Update libflac to 1.4.2 There are a few bugs that SoundsManager is hitting while trying to use the demuxing code as well as some long standing minor security issues that weren't hit when using the encoder. Verified speech recognition was still working. Bug: b/263172872 Change-Id: I54611d48b38248fdcba940ef4a5978de2edf564a Fixed: 837495, 1391739

commit: f0889bc25fd0d154ea88841c4061a99e3113d8d7 [log] [tgz]
author: Dale Curtis <dalecurtis@chromium.org> Tue Jan 24 11:08:24 2023 -0800
committer: Dale Curtis <dalecurtis@chromium.org> Tue Jan 24 19:09:33 2023 +0000
tree: eabf5b3f1e35bbcd601343318583bdefa8825ae4
parent: 1222ddf5718c561df4376ac8a660b50a13aa1ff5 [diff]
diff --git a/AUTHORS b/AUTHORS
index ef22b50..3c37286 100644
--- a/AUTHORS
+++ b/AUTHORS

@@ -1,6 +1,6 @@
 /* FLAC - Free Lossless Audio Codec
  * Copyright (C) 2001-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * This file is part the FLAC project.  FLAC is comprised of several
  * components distributed under different licenses.  The codec libraries
@@ -17,17 +17,19 @@
  * distribution.
  */
 
-Current FLAC maintainer: Erik de Castro Lopo <erikd@mega-nerd.com>
+This file lists major contributors to the FLAC project. This list is not
+exhaustive. For an exhaustive list, run the command `git shortlog -s` on
+the git repo or visit https://gitlab.xiph.org/xiph/flac/-/graphs/master
+
+For a complete list of contributions, run the command `git log` on the
+git repo, visit https://github.com/xiph/flac/commits or visit
+https://gitlab.xiph.org/xiph/flac/commits
 
 Original author: Josh Coalson <jcoalson@users.sourceforge.net>
+Maintainer 2012-2020: Erik de Castro Lopo <erikd@mega-nerd.com>
 
 Website : https://www.xiph.org/flac/
 
-FLAC is an Open Source lossless audio codec originally developed by Josh Coalson
-between 2001 and 2009. From 2009 to 2012 FLAC was basically unmaintained. In
-2012 the Erik de Castro Lopo became the chief maintainer as part of the
-Xiph.Org Foundation.
-
 Other major contributors and their contributions:
 
 "lvqcl" <lvqcl@users.sourceforge.net>
@@ -56,3 +58,7 @@
 "Matt Zimmerman" <mdz@debian.org>
 * Libtool/autoconf/automake make system, flac man page
 
+"Martijn van Beurden" <mvanb1@gmail.com>
+* Compression improvements
+* Fuzzer improvements and fixes for fuzz findings
+* Implementation of 32 bps encoder and decoder

diff --git a/BUILD.gn b/BUILD.gn
index ae81550..b70693d 100644
--- a/BUILD.gn
+++ b/BUILD.gn

@@ -3,9 +3,7 @@
 # found in the LICENSE file.
 
 config("flac_config") {
-  defines = [
-    "FLAC__NO_DLL",
-  ]
+  defines = [ "FLAC__NO_DLL" ]
 }
 
 # Need a separate config to ensure the warnings are added to the end.
@@ -15,9 +13,13 @@
       # libflac converts between FLAC__StreamDecoderState and
       # FLAC__StreamDecoderInitStatus a lot in stream_decoder.c.
       "-Wno-conversion",
+
       # libflac contains constants that are only used in certain compile-time
       # cases, which triggers unused-const-variable warnings in other cases.
       "-Wno-unused-const-variable",
+
+      # libflac contains some questionable bit math.
+      "-Wno-incompatible-pointer-types",
     ]
   }
 }
@@ -36,23 +38,29 @@
     "include/share/alloc.h",
     "include/share/compat.h",
     "include/share/endswap.h",
+    "include/share/getopt.h",
+    "include/share/grabbag.h",
+    "include/share/grabbag/cuesheet.h",
+    "include/share/grabbag/file.h",
+    "include/share/grabbag/picture.h",
+    "include/share/grabbag/replaygain.h",
+    "include/share/grabbag/seektable.h",
+    "include/share/macros.h",
     "include/share/private.h",
-    "src/libFLAC/alloc.c",
+    "include/share/replaygain_analysis.h",
+    "include/share/replaygain_synthesis.h",
+    "include/share/safe_str.h",
+    "include/share/utf8.h",
     "src/libFLAC/bitmath.c",
     "src/libFLAC/bitreader.c",
     "src/libFLAC/bitwriter.c",
     "src/libFLAC/cpu.c",
     "src/libFLAC/crc.c",
     "src/libFLAC/fixed.c",
+    "src/libFLAC/fixed_intrin_sse2.c",
+    "src/libFLAC/fixed_intrin_ssse3.c",
     "src/libFLAC/float.c",
     "src/libFLAC/format.c",
-    "src/libFLAC/lpc.c",
-    "src/libFLAC/md5.c",
-    "src/libFLAC/memory.c",
-    "src/libFLAC/stream_decoder.c",
-    "src/libFLAC/stream_encoder.c",
-    "src/libFLAC/stream_encoder_framing.c",
-    "src/libFLAC/window.c",
     "src/libFLAC/include/private/all.h",
     "src/libFLAC/include/private/bitmath.h",
     "src/libFLAC/include/private/bitreader.h",
@@ -73,6 +81,24 @@
     "src/libFLAC/include/protected/all.h",
     "src/libFLAC/include/protected/stream_decoder.h",
     "src/libFLAC/include/protected/stream_encoder.h",
+    "src/libFLAC/lpc.c",
+    "src/libFLAC/lpc_intrin_avx2.c",
+    "src/libFLAC/lpc_intrin_fma.c",
+    "src/libFLAC/lpc_intrin_neon.c",
+    "src/libFLAC/lpc_intrin_sse2.c",
+    "src/libFLAC/lpc_intrin_sse41.c",
+    "src/libFLAC/lpc_intrin_vsx.c",
+    "src/libFLAC/md5.c",
+    "src/libFLAC/memory.c",
+    "src/libFLAC/metadata_iterators.c",
+    "src/libFLAC/metadata_object.c",
+    "src/libFLAC/stream_decoder.c",
+    "src/libFLAC/stream_encoder.c",
+    "src/libFLAC/stream_encoder_framing.c",
+    "src/libFLAC/stream_encoder_intrin_avx2.c",
+    "src/libFLAC/stream_encoder_intrin_sse2.c",
+    "src/libFLAC/stream_encoder_intrin_ssse3.c",
+    "src/libFLAC/window.c",
   ]
 
   configs -= [ "//build/config/compiler:chromium_code" ]
@@ -90,7 +116,8 @@
 
   defines = [
     "FLAC__OVERFLOW_DETECT",
-    "VERSION=\"1.3.1\"",
+    "FLAC__HAS_OGG=0",
+    "PACKAGE_VERSION=\"1.4.2\"",
     "HAVE_LROUND",
   ]
 
@@ -99,15 +126,14 @@
       "include/share/win_utf8_io.h",
       "src/share/win_utf8_io/win_utf8_io.c",
     ]
+
     # win_utf8_io.c defines this itself.
     configs -= [ "//build/config/win:lean_and_mean" ]
     cflags = [
       "/wd4334",  # 32-bit shift converted to 64 bits.
-      "/wd4267"   # Converting from size_t to unsigned on 64-bit.
+      "/wd4267",  # Converting from size_t to unsigned on 64-bit.
     ]
   } else {
-    defines += [
-      "HAVE_INTTYPES_H",
-    ]
+    defines += [ "HAVE_INTTYPES_H" ]
   }
 }

diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..cd0aa0b
--- /dev/null
+++ b/CHANGELOG.md

@@ -0,0 +1,950 @@
+# Changelog
+
+This changelog is not exhaustive, review [the git commit log](https://github.com/xiph/flac/commits) for an exhaustive list of changes.
+
+## FLAC 1.4.2 (22-Oct-2022)
+
+Once again, this release only has a few changes. A problem with FLAC playback in GStreamer (and possibly other libFLAC users) was the reason for the short time since the last release
+
+* General
+    * Remove xmms plugin (Martijn van Beurden, TokyoBlackHole)
+    * Remove all pure assembler, removing build dependency on nasm
+    * Made console output more uniform across different platforms and CPUs
+    * Improve ability to tune compile for a certain system (for example with -march=native) when combining with --disable-asm-optimizations: plain C functions can now be better optimized
+* Build system
+    * Default CFLAGS are now prepended instead of dropped when user CFLAGS are set
+    * -msse2 is no longer added by default (was only applicable to x86)
+    * Fix cross-compiling and out-of-tree building when pandoc and doxygen are not available
+    * Fix issue with Clang not compiling functions with intrinsics
+    * Fix detection of bswap intrinsics (Ozkan Sezer)
+    * Improve search for libssp on MinGW (Ozkan Sezer, Martijn van Beurden)
+* libFLAC
+    * Fix issue when the libFLAC user seeks in a file instead of libFLAC itself
+
+## FLAC 1.4.1 (22-Sep-2022)
+
+This release only has a few changes. It was triggered by a problem in the 1.4.0 tarball: man pages were empty and api documentation missing
+
+* CMake fixes (Tomasz Kłoczko)
+* Add checks that man pages and api docs end up in tarball
+* Enable installation of prebuilt man pages and api docs
+* Fix compiler warnings (Johannes Kauffmann, Ozkan Sezer)
+* Fix format specifier (manxorist)
+* Enable building on Universal Windows Platform (Steve Lhomme)
+* Fix versioning from git
+
+## FLAC 1.4.0 (09-Sep-2022)
+
+As there have been changes to the library interfaces, the libFLAC version number is incremented to 12, the libFLAC++ version number is incremented to 10. As some changes were breaking, the version age numbers (see [libtool versioning](https://www.gnu.org/software/libtool/manual/libtool.html#Libtool-versioning)) have been reset to 0. For more details on the changes to the API, see the [porting guide](https://xiph.org/flac/api/group__porting__1__3__4__to__1__4__0.html).
+
+The XMMS plugin and 'common' plugin code (used only by the XMMS plugin) are deprecated, they will be removed in a future release.
+
+* General:
+    * It is now possible to limit the minimum bitrate of a FLAC file generated by libFLAC and with the `flac` tool to 1 bit/sample. This function can be used to aid live streaming, for example for internet radio
+    * Encoding files with sample rates up to 1'048'575Hz is now possible. (Con Kolivas)
+    * Compression of preset -3 through -8 was slightly improved at the cost of a small decrease in encoding speed by increasing the precision with which autocorrelation was calculated (Martijn van Beurden)
+    * Encoding speed of preset -0, -1 and -2 was slightly improved
+    * Compression of presets -1 and -4 was slighly improved on certain material by changing the adaptive mid-side heuristics
+    * Speedups specifically targeting 64-bit ARMv8 devices using NEON were integrated (Ronen Gvili, Martijn van Beurden)
+    * Speedups for x86_64 CPUs having the FMA instruction set extention are added
+    * Encoding and decoding of 32-bit PCM is now possible
+* (Ogg) FLAC format:
+    * The FLAC format document is being rewritten by the IETF CELLAR working group. The latest draft can be found on [https://datatracker.ietf.org/doc/draft-ietf-cellar-flac/](https://datatracker.ietf.org/doc/draft-ietf-cellar-flac/)
+    * The FLAC format document specifies no bounds for the residual. In other to match current decoder implementations, it is proposed to bound the residual to the range provided by a 32-bit int signed two's complement. This limit must be checked by FLAC encoders as to keep FLAC decoders free from the complexity of being to decode a residual exceeding a 32-bit int.
+    * There is now a set of files available to test whether a FLAC decoder implements the format correctly. This FLAC decoder testbench can be found at [https://github.com/ietf-wg-cellar/flac-test-files](https://github.com/ietf-wg-cellar/flac-test-files). Also, results of testing hard- and software can be found here at [https://wiki.hydrogenaud.io/index.php?title=FLAC_decoder_testbench](https://wiki.hydrogenaud.io/index.php?title=FLAC_decoder_testbench).
+* flac:
+    * The option --limit-min-bitrate was added to aid streaming, see [github #264](https://github.com/xiph/flac/pull/264)
+    * The option --keep-foreign-metadata-if-present is added. This option works the same as --keep-foreign-metadata, but does return a warning instead of an error if no foreign metadata was found to store or restore
+    * The warning returned by the foreign metadata handling is now clearer in case a user tries to restore foreign metadata of the wrong type, for example decoding a FLAC file containing AIFF foreign metadata to a WAV file
+    * A problem when using the analyse function causing the first frame to have a wrong size and offset was fixed
+    * Fix bug where channel mask of a file is unintentionally reused when several files are processed with one command
+    * The order of compression-related commands is no longer important, i.e. -8ep gives the same result as -ep8. Previously, a compression level (like -8) would override a more specific setting (like -e or -p). This is no longer the case
+    * flac now checks the block-align property of WAV files to ensure non-standard WAV files (for which flac has no handling) are not mangled
+* metaflac:
+    * (none)
+* build system:
+    * MSVC and Makefile.lite build system files have been removed. Building with MSVC (Visual Studio) can be done by using CMake
+    * Various CMake improvements, especially for creating MSVC build files (Martijn van Beurden, martinRenou, CookiePLMonster, David Callu, Tyler Dunn, Cameron Cawley)
+    * Various fixes for MinGW (Martijn van Beurden, Cameron Cawley)
+    * Removed obsolete autotools macro's to silence warnings
+    * Fixes for FreeBSD PowerPC (pkubaj)
+    * Fixed some compiler warnings (Martijn van Beurden, Tyler Dunn)
+    * Fix building with uclibc (Fabrice Fontaine)
+* testing/validation:
+    * Addition of new encoder fuzzer, adding fuzzing for 8, 24 and 32-bit inputs
+    * Addition of new decoder fuzzer, adding coverage of seeking code
+    * Addition of metadata fuzzer, adding coverage of metadata APIs
+    * Various improvements to fuzzers to improve code coverage, fuzzing speed and stability
+    * Many changes to test suite to improve cross-platform compatibility (Rosen Penev)
+    * Windows CI now also builds the whole test suite
+    * Clang-format file added (Rosen Penev)
+    * Add warning on using v141_xp platform toolset with /MT (Martijn van Beurden, Paul Sanders)
+* libraries:
+    * Various seeking fixes (Martijn van Beurden, Robert Kausch)
+    * Various bugs fixed found by fuzzing
+    * On decoding, it is now checked whether residuals can be contained by a 32-bit int, preventing integer overflow
+    * Add check that samples supplied to libFLAC actually fall within the bps set
+    * Add checks when parsing metadata blocks to not allocate excessive amounts of memory and not overread
+    * Undocumented Windows-only utf8 functions are no longer exported to the DLL interface
+    * Removed all assembler and intrinsics code from the decoder to improve fuzzing, as they provided only a small speed benefit 
+    * The bitwriter buffer is limited in size to 2^24 bytes, so it cannot write excessively large files. This is a backup in case another bug in this area creeps (back) in.
+    * The metadata iterations should now never return a vorbiscomment entry with NULL as an entry, now always at least an empty string is returned
+* documentation:
+    * Removed html documentation and generate man pages from markdown
+* Interface changes:
+    * libFLAC:
+        * Addition of FLAC__stream_encoder_set_limit_min_bitrate() and FLAC__stream_encoder_get_limit_min_bitrate(), see [github #264](https://github.com/xiph/flac/pull/264)
+        * get_client_data_from_decoder is renamed FLAC__get_decoder_client_data(), see [github #124](https://github.com/xiph/flac/pull/124)
+        * All API functions taking a filename as an argument now take UTF-8 filenames on Windows, and no longer accept filenames using the current codepage
+        * FLAC__Frame struct has changed: warmup samples are now stored in FLAC__int64 instead of FLAC__int32 types, and verbatim samples can now be stored in either FLAC__int32 or FLAC__int64 depending on whether samples fix the former or latter
+        * The FLAC__StreamMetadata struct now has a tag, so it can be forward declared
+    * libFLAC++:
+        * Addition of ::set_limit_min_bitrate() and ::get_limit_min_bitrate(), see [github #264](https://github.com/xiph/flac/pull/264)
+        * All API functions taking a filename as an argument now take UTF-8 filenames on Windows, and no longer accept filenames using the current codepage
+        * The ::FLAC__Frame struct has changed, see the libFLAC interface change.
+
+## FLAC 1.3.4 (20-Feb-2022)
+
+This release mostly fixes (security related) bugs. When building with MSVC, using CMake is preferred, see the README under "Building with CMake" for more information. Building with MSVC using solution files is deprecated and these files will be removed in the future. As there have been no changes to the library interfaces, the libFLAC version number remains 11, and libFLAC++ version number remains 9.
+
+* General:
+    * Fix 12 decoder bugs found by oss-fuzz, including CVE-2020-0499 (erikd, Martijn van Beurden)
+    * Fix encoder bug CVE-2021-0561 (NeelkamalSemwal)
+    * Integrate oss-fuzzers (erikd, Guido Vranken)
+    * Seeking fixes (NeelkamalSemwal, Robert Kausch)
+    * Various fixes and improvements (Andrei Astafev, Rosen Penev, Håkan Kvist, oreo639, erikd, Tamás Zahola, Ulrik Mikaelsson, Tyler Dunn, tmkk)
+* FLAC format:
+    * (none)
+* Ogg FLAC format:
+    * (none)
+* flac:
+    * Various fixes and improvements (Andrei Astafev, Martijn van Beurden)
+* metaflac:
+    * (none)
+* build system:
+    * CMake improvements (evpobr, Vitaliy Kirsanov, erikd, Ozkan Sezer, Tyler Dunn, tg-m DeadSix27, ericLemanissier, Chocobo1).
+    * Fixes for MinGW and MSVC (Ozkan Sezer).
+    * Fix for clang (Ozkan Sezer)
+    * Fix for PowerPC (Peter Seiderer, Thomas BERNARD)
+    * Fix for FreeBSD PowerPC (pkubaj).
+* testing/validation:
+    * Add Windows target to CI, improve logging (Ralph Giles)
+    * CI improvements (Ralph Giles, Ewout ter Hoeven)
+* documentation:
+    * Doxygen fixes (Tyler Dunn)
+    * Fix typos (Tim Gates, maxz)
+* Interface changes:
+    * libFLAC:
+        * (none)
+    * libFLAC++:
+        * (none)
+
+## FLAC 1.3.3 (4-Augs-2019)  
+
+* General:
+    * Fix CPU detection (Janne Hyvärinen).
+    * Switch from unsigned types to uint32_t (erikd).
+    * CppCheck fixes (erikd).
+    * Improve SIMD decoding of 24 bit files (lvqcl).
+    * POWER* amnd POWER9 improvements (Anton Blanchard).
+    * More tests.
+* FLAC format:
+    * (none)
+* Ogg FLAC format:
+    * (none)
+* flac:
+    * When converting to WAV, use WAVEFORMATEXTENSIBLE when bits per second is not 8 or 16 (erikd).
+    * Fix --output-prefix with input-files in sub-directories (orbea).
+* metaflac:
+    * (none)
+* plugins:
+    * (none)
+* build system:
+    * Cmake support (Vitaliy Kirsanov, evpobr).
+    * Visual Studio updates (Janne Hyvärinen).
+    * Fix for MSVC when UNICODE is enabled (lvqcl).
+    * Fix for OpenBSD/i386 (Christian Weisgerber).
+* documentation:
+    * (none)
+* libraries:
+    * (none).
+* Interface changes:
+    * libFLAC:
+        * (none)
+    * libFLAC++:
+        * (none)
+
+## FLAC 1.3.2 (01-Jan-2017)  
+
+* General:
+    * Fix undefined behaviour using GCC/Clang UBSAN (erikd).
+    * General hardening via fuzz testing with AFL (erikd and others).
+    * General code improvements (lvqcl, erikd and others).
+    * Add FLAC in MP4 specification docs (Ralph Giles).
+    * MSVS build cleanups (lvqcl).
+    * Fix some cppcheck warnings (erikd).
+    * Assume all currently used OSes support SSE2.
+* FLAC format:
+    * (none)
+* Ogg FLAC format:
+    * (none)
+* flac:
+    * Fix potential infinite loop on flac-to-flac conversion (erikd).
+    * Add WAVEFORMATEXTENSIBLE to WAV (as needed) when decoding (lvqcl).
+    * Only write vorbis-comments if they are non-empty.
+    * Error out if decoding RAW with bits != (8|16|24).
+* metaflac:
+    * Add --scan-replay-gain option.
+* plugins:
+    * (none)
+* build system:
+    * Fixes for MSVC and Makefile.lite build systems.
+* documentation:
+    * (none)
+* libraries:
+    * CPU detection cleanup and fixes (Julian Calaby, erikd and lvqcl).
+    * Fix two stream decoder bugs (Max Kellermann).
+    * Fix a NULL dereference bug (on a malformed file).
+    * Changed the LPC order guess for a slight compression improvement, particularly for classical music (Martijn van Beurden).
+    * Improved encoding speed on older Intel CPUs.
+    * Fixed a seeking bug when decoding certain files (Miroslav Lichvar).
+    * Put an upper bound (32768) on the number of seek points.
+    * Fix potential memory leaks.
+    * Support 64bit brword/bwword allowing FLAC__BYTES_PER_WORD to be set to 8 (disabled by default).
+    * Fix an out-of-bounds heap read.
+    * Win32: Only use large buffers when writing to disk.
+* Interface changes:
+    * libFLAC:
+        * (none)
+    * libFLAC++:
+        * (none)
+
+## FLAC 1.3.1 (25-Nov-2014)  
+
+* General:
+    * Improved decoding efficiency of all bit depths but especially so for 24 bits for IA32 architecture (lvqcl and Miroslav Lichvar).
+    * Faster encoding using SSE and AVX (lvqcl).
+    * Fixed bartlett, bartlett_hann and triangle functions.
+    * New apodization functions partial_tukey and punchout_tukey for improved compression (Martijn van Beurden).
+    * Retuned compression presets to incorporate new apodization functions (Martijn van Beurden).
+    * Fix -Wcast-align warnings on armhf architecture (Erik de Castro Lopo).
+* FLAC format:
+    * (none)
+* Ogg FLAC format:
+    * (none)
+* flac:
+    * Help output documentation improvements.
+    * I/O buffering improvements on Windows to reduce disk fragmentation when writing files.
+    * Only write vorbis-comments if they are non-empty.
+* metaflac:
+    * (none)
+* plugins:
+    * Fix symbol visibility in XMMS plugin.
+* build system:
+    * Many fixes and improvements across all the build systems.
+* documentation:
+    * Document new [apodization windows](https://xiph.org/flac/documentation_tools_flac.html#flac_options_apodization).
+* libraries:
+    * Fix CVE-2014-9028 (heap write overflow) and CVE-2014-8962 (heap read overflow) (Erik de Castro Lopo).
+* Interface changes:
+    * libFLAC:
+        * (none)
+    * libFLAC++:
+        * (none)
+
+## FLAC 1.3.0 (26-May-2013)  
+
+* General:
+    * Move development to Xiph.org git repository.
+    * The <span class="argument">[--sector-align](https://xiph.org/flac/documentation_tools_flac.html#flac_options_sector_align)</span> option of <span class="commandname">flac</span> has been deprecated and may not exist in future versions. [shntool](http://www.etree.org/shnutils/shntool/) provides similar functionality.
+    * Support for the RF64 and Wave64 formats in <span class="commandname">flac</span> (see below).
+    * Better handling of cuesheets with non-CD-DA sample rates.
+    * The <span class="argument">[--ignore-chunk-sizes](https://xiph.org/flac/documentation_tools_flac.html#flac_options_ignore_chunk_sizes)</span> option has been added to the <span class="commandname">flac</span> command line tool.
+* FLAC format:
+    * (none)
+* Ogg FLAC format:
+    * (none)
+* flac:
+    * Added support for encoding from and decoding to the RF64 format, and a new corresponding option <span class="argument">[--force-rf64-format](https://xiph.org/flac/documentation_tools_flac.html#flac_options_force_rf64_format)</span>. ([SF #1762502](http://sourceforge.net/p/flac/feature-requests/78/)). <span class="argument">[--keep-foreign-metadata](https://xiph.org/flac/documentation_tools_flac.html#flac_options_keep_foreign_metadata)</span> is also supported.
+    * Added support for encoding from and decoding to the Sony Wave64 format, and a new corresponding option <span class="argument">[--force-wave64-format](https://xiph.org/flac/documentation_tools_flac.html#flac_options_force_wave64_format)</span>. ([SF #1769582](http://sourceforge.net/p/flac/feature-requests/79/)). <span class="argument">[--keep-foreign-metadata](https://xiph.org/flac/documentation_tools_flac.html#flac_options_keep_foreign_metadata)</span> is also supported.
+    * Added new options <span class="argument">[--preserve-modtime](https://xiph.org/flac/documentation_tools_flac.html#flac_options_preserve_modtime)</span> and <span class="argument">[--no-preserve-modtime](https://xiph.org/flac/documentation_tools_flac.html#negative_options)</span> to specify whether or not output files should copy the timestamp and permissions from their input files. The default is <span class="argument">[--preserve-modtime](https://xiph.org/flac/documentation_tools_flac.html#flac_options_preserve_modtime)</span> as in previous versions. ([SF #1805428](http://sourceforge.net/p/flac/feature-requests/85/)).
+    * Allow MM:SS:FF and MM:SS.SS time formats in non-CD-DA cuesheets. ([SF #1947353](http://sourceforge.net/p/flac/feature-requests/95/), [SF #2182432](http://sourceforge.net/p/flac/bugs/338/))
+    * The <span class="argument">[--sector-align](https://xiph.org/flac/documentation_tools_flac.html#flac_options_sector_align)</span> option of <span class="commandname">flac</span> has been deprecated and may not exist in future versions. [shntool](http://www.etree.org/shnutils/shntool/) provides similar functionality. ([SF #1805946](http://sourceforge.net/p/flac/feature-requests/86/))
+    * Improved error message when user attempts to decode a non-FLAC file ([SF #2222789](http://sourceforge.net/p/flac/bugs/341/)).
+    * Fix bug where <span class="commandname">flac</span> was disallowing use of <span class="argument">--replay-gain</span> when encoding from stdin ([SF #1840124](http://sourceforge.net/p/flac/bugs/313/)).
+    * Fix bug with fractional seconds on some locales ([SF #1815517](http://sourceforge.net/p/flac/bugs/309/), [SF #1858012](http://sourceforge.net/p/flac/bugs/321/)).
+    * Read and write appropriate channel masks for 6.1 and 7.1 surround input WAV files. Documentation was also updated.
+    * Correct Wave64 GUIDs.
+    * Support 56kHz to 192kHz gain analysis (patch from Earl Chew)
+    * Add ability to handle unicode filenames on Windows (large set of patches from Janne Hyvärinen)
+* metaflac:
+    * Allow MM:SS:FF and MM:SS.SS time formats in non-CD-DA cuesheets. ([SF #1947353](http://sourceforge.net/p/flac/feature-requests/95/), [SF #2182432](http://sourceforge.net/p/flac/bugs/338/))
+* plugins:
+    * Minor updates for XMMS plugin.
+    * Winamp2 plugin was dropped because Nullsoft has provided native FLAC support since 2006.
+* build system:
+    * Fixes for autotools (including [SF #1859664](http://sourceforge.net/p/flac/patches/28/)).
+    * Fixes for MinGW (including [SF #2000973](http://sourceforge.net/p/flac/bugs/), [SF #2209829](http://sourceforge.net/p/flac/bugs/)).
+    * Fixes for gcc (including [SF #1834168](http://sourceforge.net/p/flac/bugs/), [SF #2002481](http://sourceforge.net/p/flac/bugs/334/)).
+    * Fixes for Sun Studio/Forte ([SF #1701960](http://sourceforge.net/p/flac/patches/22/)).
+    * Fixes for windows builds (including [SF #1676822](http://sourceforge.net/p/flac/bugs/257/), [SF #1756624](http://sourceforge.net/p/flac/feature-requests/73/), [SF #1809863](http://sourceforge.net/p/flac/bugs/307/), [SF #1911149](http://sourceforge.net/p/flac/feature-requests/)).
+    * Fixes for FreeBSD and OpenBSD.
+    * Compile with GNU gcc _FORTIFY_SOURCE=2 and stack protection where those features are detected.
+    * Enable a bunch of GCC compiler warnings and fix code that generates warnings.
+* documentation:
+    * Document <span class="argument">[--ignore-chunk-sizes](https://xiph.org/flac/documentation_tools_flac.html#flac_options_ignore_chunk_sizes)</span> and <span class="argument">[--apply-replaygain-which-is-not-lossless](https://xiph.org/flac/documentation_tools_flac.html#flac_options_apply_replaygain_which_is_not_lossless)</span> option for <span class="commandname">flac</span>.
+* libraries:
+    * libFLAC encoder was defaulting to level 0 compression instead of 5 ([SF #1816825](http://sourceforge.net/p/flac/bugs/310/)).
+    * Fix bug in bitreader handling of read callback returning a short count ([SF #2490454](http://sourceforge.net/p/flac/bugs/345/)).
+    * Improve decoder's ability to distinguish between a FLAC sync code and an MPEG one ([SF #2491433](http://sourceforge.net/p/flac/bugs/346/)).
+* Interface changes:
+    * libFLAC:
+        * **Added** FLAC__format_blocksize_is_subset()
+    * libFLAC++:
+        * Add a number of convenience methods.
+
+## FLAC 1.2.1 (17-Sep-2007)  
+
+* General:
+    * With the new <span class="argument">[--keep-foreign-metadata](https://xiph.org/flac/documentation_tools_flac.html#flac_options_keep_foreign_metadata)</span> in <span class="commandname">flac</span>, non-audio RIFF and AIFF chunks can be stored in FLAC files and recreated when decoding. This allows, among other, things support for archiving BWF files and other WAVE files from editing tools that preserves all the metadata.
+* FLAC format:
+    * Specified 2 new APPLICATION metadata blocks for storing WAVE and AIFF chunks (for use with [--keep-foreign-metadata](https://xiph.org/flac/documentation_tools_flac.html#flac_options_keep_foreign_metadata) in <span class="commandname">flac</span>).
+    * The lead-out track number for non-CDDA cuesheets now must be 255.
+* Ogg FLAC format:
+    * This is not a format change, but changed default extension for Ogg FLAC from .ogg to .oga, according to new Xiph [specification](http://wiki.xiph.org/index.php/MIME_Types_and_File_Extensions) ([SF #1762492](http://sourceforge.net/p/flac/bugs/283/)).
+* flac:
+    * Added a new option <span class="argument">[--no-utf8-convert](https://xiph.org/flac/documentation_tools_flac.html#flac_options_no_utf8_convert)</span> which works like it does in <span class="commandname">metaflac</span> ([SF #973740](http://sourceforge.net/p/flac/feature-requests/35/)).
+    * Added a new option <span class="argument">[--keep-foreign-metadata](https://xiph.org/flac/documentation_tools_flac.html#flac_options_keep_foreign_metadata)</span> which can save/restore RIFF and AIFF chunks to/from FLAC files ([SF #363478](http://sourceforge.net/p/flac/feature-requests/9/)).
+    * Changed default extension for Ogg FLAC from .ogg to .oga, according to new Xiph [specification](http://wiki.xiph.org/index.php/MIME_Types_and_File_Extensions) ([SF #1762492](http://sourceforge.net/p/flac/bugs/283/)).
+    * Fixed bug where using <span class="argument">--replay-gain</span> without any padding option caused only a small PADDING block to be created ([SF #1760790](http://sourceforge.net/p/flac/bugs/282/)).
+    * Fixed bug where encoding from stdin on Windows could fail if WAVE/AIFF contained unknown chunks ([SF #1776803](http://sourceforge.net/p/flac/bugs/290/)).
+    * Fixed bug where importing non-CDDA cuesheets would cause an invalid lead-out track number ([SF #1764105](http://sourceforge.net/p/flac/bugs/286/)).
+* metaflac:
+    * Changed default extension for Ogg FLAC from .ogg to .oga, according to new Xiph [specification](http://wiki.xiph.org/index.php/MIME_Types_and_File_Extensions) ([SF #1762492](http://sourceforge.net/p/flac/bugs/283/)).
+    * Fixed bug where importing non-CDDA cuesheets would cause an invalid lead-out track number ([SF #1764105](http://sourceforge.net/p/flac/bugs/286/)).
+* plugins:
+    * (none)
+* build system:
+    * New configure option <span class="argument">--disable-cpplibs</span> to prevent building libFLAC++ ([SF #1723295](http://sourceforge.net/p/flac/patches/23/)).
+    * Fixed bug compiling <span class="commandname">flac</span> without Ogg support ([SF #1760786](http://sourceforge.net/p/flac/bugs/281/)).
+    * Fixed bug where sometimes an existing installation of flac could interfere with the build process ([SF #1763690](http://sourceforge.net/p/flac/bugs/285/)).
+    * OS X fixes ([SF #1786225](http://sourceforge.net/p/flac/patches/25/)).
+    * MinGW fixes ([SF #1684879](http://sourceforge.net/p/flac/bugs/264/)).
+    * Solaris 10 fixes ([SF #1783225](http://sourceforge.net/p/flac/bugs/294/) [SF #1783630](http://sourceforge.net/p/flac/bugs/295/)).
+    * OS/2 fixes ([SF #1771378](http://sourceforge.net/p/flac/bugs/287/) [SF #1229495](http://sourceforge.net/p/flac/bugs/174/)).
+    * automake-1.10 fixes ([SF #1791361](http://sourceforge.net/p/flac/bugs/300/) [SF #1792179](http://sourceforge.net/p/flac/bugs/302/)).
+* documentation:
+    * Added new [tutorial](https://xiph.org/flac/documentation_tools_flac.html#tutorial) section for <span class="commandname">flac</span>.
+    * Added [example code](https://xiph.org/flac/documentation_example_code.html) section for using libFLAC/libFLAC++.
+* libraries:
+    * libFLAC: Fixed very rare seek bug ([SF #1684049](http://sourceforge.net/p/flac/bugs/263/)).
+    * libFLAC: Fixed seek bug with Ogg FLAC and small streams ([SF #1792172](http://sourceforge.net/p/flac/bugs/301/)).
+    * libFLAC: 64-bit fixes ([SF #1790872](http://sourceforge.net/p/flac/bugs/299/)).
+    * libFLAC: Fix assembler code to be position independent.
+    * libFLAC: Optimization of a number of inner loop functions.
+    * Added support for encoding the residual coding method introduced in libFLAC 1.2.0 (RESIDUAL_CODING_METHOD_PARTITIONED_RICE2) which will encode 24-bit files more efficiently.
+* Interface changes:
+    * libFLAC:
+        * **Added** FLAC__metadata_simple_iterator_is_last()
+        * **Added** FLAC__metadata_simple_iterator_get_block_offset()
+        * **Added** FLAC__metadata_simple_iterator_get_block_length()
+        * **Added** FLAC__metadata_simple_iterator_get_application_id()
+    * libFLAC++:
+        * **Added** FLAC::Metadata::SimpleIterator::is_last()
+        * **Added** FLAC::Metadata::SimpleIterator::get_block_offset()
+        * **Added** FLAC::Metadata::SimpleIterator::get_block_length()
+        * **Added** FLAC::Metadata::SimpleIterator::get_application_id()
+
+## FLAC 1.2.0 (23-Jul-2007)  
+
+* General:
+    * Small encoding speedups for all modes.
+* FLAC format:
+    * One of the reserved bits in the FLAC frame header has been assigned for future use; make sure to refer to the [porting guide](https://xiph.org/flac/api/group__porting__1__1__4__to__1__2__0.html) if you parse FLAC streams manually.
+* Ogg FLAC format:
+    * (none)
+* flac:
+    * Added runtime detection of SSE OS support for most operating systems.
+    * Added a new undocumented option <span class="argument">--ignore-chunk-sizes</span> for ignoring the size of the 'data' chunk (WAVE) or 'SSND' chunk (AIFF). Can be used to encode files with bogus data sizes (e.g. with WAV files piped from foobar2000 to flac.exe as an external encoder). **Use with caution**: all subsequent data is treated as audio, so the data/SSND chunk must be the last or the following data/tags will be treated as audio and encoded.
+* metaflac:
+    * (none)
+* plugins:
+    * (none)
+* build system:
+    * Added solution and project files for building with VC++ 2005.
+* libraries:
+    * Added runtime detection of SSE OS support for most operating systems.
+    * Fixed bug where invalid seek tables could cause some seeks to fail.
+    * Added support for decoding the new residual coding method (RESIDUAL_CODING_METHOD_PARTITIONED_RICE2).
+* Interface changes (see also the [porting guide](https://xiph.org/flac/api/group__porting__1__1__4__to__1__2__0.html) for specific instructions on porting to FLAC 1.2.0):
+    * libFLAC:
+        * **Added** FLAC__format_sample_rate_is_subset()
+    * libFLAC++:
+        * **Added** FLAC::Decoder::Stream::get_decode_position()
+
+## FLAC 1.1.4 (13-Feb-2007)  
+
+* General:
+    * Improved compression with no change to format or decrease in speed.
+    * Encoding and decoding speedups for all modes. Encoding at -8 is twice as fast.
+* FLAC format:
+    * (none)
+* Ogg FLAC format:
+    * (none)
+* flac:
+    * Improved compression with no change to format or decrease in speed.
+    * Encoding and decoding speedups for all modes. Encoding at -8 is twice as fast.
+    * Added a new option <span class="argument">[-w,--warnings-as-errors](https://xiph.org/flac/documentation_tools_flac.html#flac_options_warnings_as_errors)</span> for treating all warnings as errors.
+    * Allow <span class="argument">[--picture](https://xiph.org/flac/documentation_tools_flac.html#flac_options_picture)</span> option to take only a filename, and have all other attributes extracted from the file itself.
+    * Fixed a bug that caused suboptimal default compression settings in some locales ([SF #1608883](http://sourceforge.net/p/flac/bugs/237/)).
+    * Fixed a bug where FLAC-to-FLAC transcoding of a corrupted FLAC file would truncate the transcoded file at the first error ([SF #1615019](http://sourceforge.net/p/flac/bugs/241/)).
+    * Fixed a bug where using <span class="argument">[-F](https://xiph.org/flac/documentation_tools_flac.html#flac_options_decode_through_errors)</span> with FLAC-to-FLAC transcoding of a corrupted FLAC would have no effect ([SF #1615391](http://sourceforge.net/p/flac/bugs/242/)).
+    * Fixed a bug where new PICTURE metadata blocks specified with <span class="argument">[--picture](https://xiph.org/flac/documentation_tools_flac.html#flac_options_picture)</span> would not be transferred during FLAC-to-FLAC transcoding ([SF #1627993](http://sourceforge.net/p/flac/bugs/246/)).
+* metaflac:
+    * Allow <span class="argument">[--import-picture-from](https://xiph.org/flac/documentation_tools_metaflac.html#metaflac_shorthand_import_picture_from)</span> option to take only a filename, and have all other attributes extracted from the file itself.
+* plugins:
+    * Fixed a bug in the XMMS plugin where Ctrl-3 (file info) would cause a crash if the file did not exist ([SF #1634941](http://sourceforge.net/p/flac/patches/20/)).
+* build system:
+    * Fixed a makefile linkage bug with libogg ([SF #1611414](http://sourceforge.net/p/flac/bugs/239/)).
+    * Added pkg-config files for libFLAC and libFLAC++ ([SF #1647881](http://sourceforge.net/p/flac/patches/21/)).
+    * Added <span class="argument">--disable-ogg</span> option for building without Ogg support even if libogg is installed ([SF #1196996](http://sourceforge.net/p/flac/bugs/165/)).
+* libraries:
+    * Completely rewritten bitbuffer which uses native machine word size instead of bytes for dramatic speed improvements. The speedup should be most dramatic on CPUs with slower byte manipulation capability and big-endian machines.
+    * Much faster Rice partition size estimation which greatly speeds encoding in higher modes.
+    * Increased compression for all modes.
+    * Reduced memory requirements for encoder and decoder.
+    * Fixed a bug with default apodization settings that were erroneous in some locales ([SF #1608883](http://sourceforge.net/p/flac/bugs/237/)).
+* Interface changes:
+    * libFLAC:
+        * (behavior only) FLAC__stream_encoder_set_metadata() now makes a copy of the "metadata" array of pointers (but still not copies of the objects themselves) so the client does not need to maintain its copy of the array after the call.
+    * libFLAC++:
+        * (none)
+
+## FLAC 1.1.3 (27-Nov-2006)  
+
+* General:
+    * Improved compression with no impact on format or decoding speed.
+    * Much better recovery for corrupted files
+    * Better multichannel support
+    * Large file (>2GB) support everywhere
+    * <span class="commandname">flac</span> now supports FLAC and Ogg FLAC as input to the encoder (e.g. can re-encode FLAC to FLAC) and preserve all the metadata like tags, etc.
+    * New <span class="code">[PICTURE](https://xiph.org/flac/format.html#def_PICTURE)</span> metadata block for storing things like cover art, new <span class="argument">[--picture](https://xiph.org/flac/documentation_tools_flac.html#flac_options_picture)</span> option to <span class="commandname">flac</span> and <span class="argument">[--import-picture-from](https://xiph.org/flac/documentation_tools_metaflac.html#metaflac_shorthand_import_picture_from)</span> option to <span class="commandname">metaflac</span> for importing pictures, new <span class="argument">[--export-picture-to](https://xiph.org/flac/documentation_tools_metaflac.html#metaflac_shorthand_export_picture_to)</span> option to <span class="commandname">metaflac</span> for exporting pictures, and metadata API [additions](https://xiph.org/flac/api/group__flac__metadata__level0.html#ga3) for searching for suitable pictures based on type, size and color constraints.
+    * Support for new <tt>REPLAYGAIN_REFERENCE_LOUDNESS</tt> tag.
+    * Fixed a bug in Ogg FLAC encoding where metadata was not being updated properly. Existing Ogg FLAC files should be recoded to fix up the metadata, e.g. <span class="command">flac -Vf -S 10s --ogg file.ogg</span>
+    * In the developer libraries, the interface has been simplfied by merging the three decoding layers into a single class; ditto for the encoders. Also, libOggFLAC has been merged into libFLAC and libOggFLAC++ has been merged into libFLAC++ so there is a single API supporting both native FLAC and Ogg FLAC.
+* FLAC format:
+    * New <span class="code">[PICTURE](https://xiph.org/flac/format.html#def_PICTURE)</span> metadata block for storing things like cover art.
+    * Speaker assignments and channel orders for 3-6 channels (see [frame header](https://xiph.org/flac/format.html#frame_header)).
+    * Further restrictions on the [FLAC subset](https://xiph.org/flac/format.html#subset) when the sample rate is <=48kHz; in this case the maximum LPC order is now 12 and maximum blocksize is 4608\. This is to further limit the processing and memory requirements for hardware implementations while not measurably affecting compression.
+* Ogg FLAC format:
+    * (none)
+* flac:
+    * Improved the <span class="argument">[-F](https://xiph.org/flac/documentation_tools_flac.html#flac_options_decode_through_errors)</span> option to allow decoding of FLAC files whose metadata is corrupted, and other kinds of severe corruption.
+    * Encoder can now take FLAC and Ogg FLAC as input. The output FLAC file will have all the same metadata as the original unless overridden with options on the command line.
+    * Encoder can now take WAVEFORMATEXTENSIBLE WAVE files as input; decoder will output WAVEFORMATEXTENSIBLE WAVE files when necessary to conform to the latest Microsoft specifications.
+    * Now properly supports AIFF and WAVEFORMATEXTENSIBLE multichannel input, performing necessary channel reordering both for encoding and decoding. WAVEFORMATEXTENSIBLE channel mask is also saved to a tag on encoding and restored on decoding for situations when there is no natural mapping to FLAC channel assignments.
+    * Expanded support for "odd" sample resolutions to WAVE and AIFF input; all resolutions from 4 to 24 bits-per-sample now supported for all input types.
+    * Added a new option <span class="argument">[--tag-from-file](https://xiph.org/flac/documentation_tools_flac.html#flac_options_tag_from_file)</span> for setting a tag from file (e.g. for importing a cuesheet as a tag).
+    * Added a new option <span class="argument">[--picture](https://xiph.org/flac/documentation_tools_flac.html#flac_options_picture)</span> for adding pictures.
+    * Added a new option <span class="argument">[--apodization](https://xiph.org/flac/documentation_tools_flac.html#flac_options_apodization)</span> for specifying the window function(s) to be used in LPC analysis.
+    * Added support for encoding from non-compressed AIFF-C ([SF #1090933](http://sourceforge.net/p/flac/bugs/143/)).
+    * Importing of non-CDDA-compliant cuesheets now only issues a warning, not an error (see [here](http://www.hydrogenaud.io/forums/index.php?showtopic=31282)).
+    * MD5 comparison failures on decoding are now an error instead of a warning and will also return a non-zero exit code ([SF #1493725](http://sourceforge.net/p/flac/bugs/221/)).
+    * The default padding size is now 8K, or 64K if the input audio stream is more than 20 minutes long.
+    * Fixed a bug in cuesheet parsing where it would return an error if the last line of the cuesheet did not end with a newline.
+    * Fixed a bug that caused a crash when <span class="argument">-a</span> and <span class="argument">-t</span> were used together ([SF #1229481](http://sourceforge.net/p/flac/bugs/173/)).
+    * Fixed a bug with --sector-align where appended samples were not always totally silent ([SF #1237707](http://sourceforge.net/p/flac/bugs/179/)).
+    * Fixed bugs with --sector-align and raw input files.
+    * Fixed a bug printing out unknown AIFF subchunk names ([SF #1267476](http://sourceforge.net/p/flac/bugs/186/)).
+    * Fixed a bug where WAVE files with "data" subchunks of size 0 where accepted ([SF #1293830](http://sourceforge.net/p/flac/bugs/190/)).
+    * Fixed a bug where sync error at end-of-stream of truncated files was not being caught ([SF #1244071](http://sourceforge.net/p/flac/bugs/183/)).
+    * Fixed a problem with filename parsing if file does not have extension but also has a . in the path ([SF #1161916](http://sourceforge.net/p/flac/bugs/159/)).
+    * Fixed a problem with fractional-second parsing for <span class="argument">--skip</span>/<span class="argument">--until</span> in some locales ([SF #1031043](http://sourceforge.net/p/flac/bugs/125/)).
+    * Increase progress report rate when -p and -e are used together ([SF #1580122](http://sourceforge.net/p/flac/bugs/229/)).
+* metaflac:
+    * Added support for read-only operations on Ogg FLAC files.
+    * Added a new option <span class="argument">[--set-tag-from-file](https://xiph.org/flac/documentation_tools_metaflac.html#metaflac_shorthand_set_tag_from_file)</span> for setting a tag from file (e.g. for importing a cuesheet as a tag).
+    * Added a new option <span class="argument">[--import-picture-from](https://xiph.org/flac/documentation_tools_metaflac.html#metaflac_shorthand_import_picture_from)</span> for importing pictures.
+    * Added a new option <span class="argument">[--export-picture-to](https://xiph.org/flac/documentation_tools_metaflac.html#metaflac_shorthand_export_picture_to)</span> for exporting pictures.
+    * Added shorthand operation <span class="argument">[--remove-replay-gain](https://xiph.org/flac/documentation_tools_metaflac.html#metaflac_shorthand_remove_replay_gain)</span> for removing ReplayGain tags.
+    * <span class="argument">[--export-cuesheet-to](https://xiph.org/flac/documentation_tools_metaflac.html#metaflac_shorthand_export_cuesheet_to)</span> now properly specifies the FLAC file name ([SF #1272825](http://sourceforge.net/p/flac/feature-requests/46/)).
+    * Importing of non-CDDA-compliant cuesheets now issues a warning.
+    * Removed the following deprecated tag editing options; you should use the new option names shown instead:
+        * Removed <span class="argument">--show-vc-vendor</span>; use <span class="argument">--show-vendor-tag</span>
+        * Removed <span class="argument">--show-vc-field</span>; use <span class="argument">--show-tag</span>
+        * Removed <span class="argument">--remove-vc-all</span>; use <span class="argument">--remove-all-tags</span>
+        * Removed <span class="argument">--remove-vc-field</span>; use <span class="argument">--remove-tag</span>
+        * Removed <span class="argument">--remove-vc-firstfield</span>; use <span class="argument">--remove-first-tag</span>
+        * Removed <span class="argument">--set-vc-field</span>; use <span class="argument">--set-tag</span>
+        * Removed <span class="argument">--import-vc-from</span>; use <span class="argument">--import-tags-from</span>
+        * Removed <span class="argument">--export-vc-to</span>; use <span class="argument">--export-tags-to</span>
+    * Disallow multiple input FLAC files when --import-tags-from=- is used ([SF #1082577](http://sourceforge.net/p/flac/bugs/141/)).
+* plugins:
+    * When ReplayGain is on, if tags for the preferred kind of gain (album/track) are not in a stream, the other kind will be used.
+    * Added ReplayGain info to file info box in XMMS plugin
+    * Fixed UTF-8 decoder to disallow non-shortest-form and surrogate sequences (see [here](http://www.unicode.org/versions/corrigendum1.html)).
+* build system:
+    * Added support for building on OS/2 with EMX ([SF #1229495](http://sourceforge.net/p/flac/bugs/174/))
+    * Added support for building with Borland C++ ([SF #1599018](http://sourceforge.net/p/flac/patches/17/))
+    * Added a <span class="argument">--disable-xmms-plugin</span> option to <span class="command">configure</span> to prevent building the XMMS plugin ([SF #930494](http://sourceforge.net/p/flac/feature-requests/33/)).
+    * Added a <span class="argument">--disable-doxygen-docs</span> option to <span class="command">configure</span> for disabling Doxygen-based API doc generation ([SF #1365935](http://sourceforge.net/p/flac/patches/12/)).
+    * Added a <span class="argument">--disable-thorough-tests</span> option to <span class="command">configure</span> to do the basic library, stream, and tool tests in a reasonable time ([SF #1077948](http://sourceforge.net/p/flac/feature-requests/40/)).
+    * Added large file support with <span class="argument">AC_SYS_LARGEFILE</span>; use <span class="argument">--disable-largefile</span> with <span class="command">configure</span> to disable.
+* libraries:
+    * Merged libOggFLAC into libFLAC; both formats are now supported through the same API.
+    * Merged libOggFLAC++ into libFLAC++; both formats are now supported through the same API.
+    * libFLAC and libFLAC++: Simplified encoder setup with new <span class="argument">FLAC__stream_encoder_set_compression_level()</span> function.
+    * libFLAC: Improved compression with no impact on FLAC format or decoding time by adding a windowing stage before LPC analysis.
+    * libFLAC: Fixed a bug where missing STREAMINFO fields (min/max framesize, total samples, MD5 sum) and seek point offsets were not getting rewritten back to Ogg FLAC file ([SF #1338969](http://sourceforge.net/p/flac/bugs/197/)).
+    * libFLAC: Fixed a bug in cuesheet parsing where it would return an error if the last line of the cuesheet did not end with a newline.
+    * libFLAC: Fixed UTF-8 decoder to disallow non-shortest-form and surrogate sequences (see [here](http://www.unicode.org/versions/corrigendum1.html)).
+    * libFLAC: Fixed a bug in the return value for <span class="argument">FLAC__stream_decoder_set_metadata_respond_application()</span> and <span class="argument">FLAC__stream_decoder_set_metadata_ignore_application()</span> when there was a memory allocation error ([SF #1235005](http://sourceforge.net/p/flac/bugs/176/)).
+* Interface changes (see also the [porting guide](https://xiph.org/flac/api/group__porting__1__1__2__to__1__1__3.html) for specific instructions on porting to FLAC 1.1.3):
+    * all libraries;
+        * Merged libOggFLAC into libFLAC; both formats are now supported through the same API.
+        * Merged libOggFLAC++ into libFLAC++; both formats are now supported through the same API.
+        * Merged seekable stream decoder and file decoder into the stream decoder.
+        * Merged seekable stream encoder and file encoder into the stream encoder.
+        * Added #defines for the API version number to make porting easier; see <tt>include/lib*FLAC*/export.h</tt>.
+    * libFLAC:
+        * **Added** FLAC__stream_encoder_set_apodization()
+        * **Added** FLAC__stream_encoder_set_compression_level()
+        * **Added** FLAC__metadata_object_cuesheet_calculate_cddb_id()
+        * **Added** FLAC__metadata_get_cuesheet()
+        * **Added** FLAC__metadata_get_picture()
+        * **Added** FLAC__metadata_chain_read_ogg() and FLAC__metadata_chain_read_ogg_with_callbacks()
+        * **Changed** FLAC__stream_encoder_finish() now returns a FLAC__bool to signal a verify failure, or error processing last frame or updating metadata.
+        * **Changed** FLAC__StreamDecoderState: removed state FLAC__STREAM_DECODER_UNPARSEABLE_STREAM
+        * **Changed** FLAC__StreamDecoderErrorStatus: new error code FLAC__STREAM_DECODER_ERROR_STATUS_UNPARSEABLE_STREAM
+        * The above two changes mean that when the decoder encounters what it thinks are unparseable frames from a future decoder, instead of returning a fatal error with the FLAC__STREAM_DECODER_UNPARSEABLE_STREAM state, it just calls the error callback with FLAC__STREAM_DECODER_ERROR_STATUS_UNPARSEABLE_STREAM and leaves the behavior up to the application.
+    * libFLAC++:
+        * **Added** FLAC::Metadata::Picture
+        * **Added** FLAC::Encoder::Stream::set_apodization()
+        * **Added** FLAC::Encoder::Stream::set_compression_level()
+        * **Added** FLAC::Metadata::CueSheet::calculate_cddb_id()
+        * **Added** FLAC::Metadata::get_cuesheet()
+        * **Added** FLAC::Metadata::get_picture()
+        * **Changed** FLAC::Metadata::Chain::read() to accept a flag denoting Ogg FLAC input
+        * **Changed** FLAC::Decoder::Stream::finish() now returns a bool to signal an MD5 failure like FLAC__stream_decoder_finish() does.
+        * **Changed** FLAC::Encoder::Stream::finish() now returns a bool to signal a verify failure, or error processing last frame or updating metadata.
+    * libOggFLAC:
+        * Merged into libFLAC.
+    * libOggFLAC++:
+        * Merged into libFLAC++.
+
+## FLAC 1.1.2 (05-Feb-2005)  
+
+* General:
+    * Sped up decoding by a few percent overall.
+    * Sped up encoding when not using LPC (i.e. when using <span class="commandname">flac</span> options <span class="argument">-0</span>, <span class="argument">-1</span>, <span class="argument">-2</span>, or <span class="argument">-l 0</span>).
+    * Fixed a decoding bug that could cause sync errors with some ID3v1-tagged FLAC files.
+    * Added [HTML documentation for metaflac](https://xiph.org/flac/documentation_tools_metaflac.html#metaflac).
+* FLAC format:
+    * (none)
+* Ogg FLAC format:
+    * (none)
+* flac:
+    * New option <span class="argument">[--input-size](https://xiph.org/flac/documentation_tools_flac.html#flac_options_input_size)</span> to manually specify the input size when encoding raw samples from stdin.
+* metaflac:
+    * (none)
+* plugins:
+    * Added support for HTTP streaming in XMMS plugin. **NOTE**: there is a bug in the XMMS mpg123 plugin that hijacks FLAC streams; to fix it you will need to add the '.flac' extension to the list of exceptions in <span class="code">Input/mpg123/mpg123.c:is_our_file()</span> in the XMMS sources and recompile.
+* build system:
+    * (none)
+* libraries:
+    * libFLAC: Sped up Rice block decoding in the bitbuffer, resulting in decoding speed gains of a few percent.
+    * libFLAC: Sped up encoding when not using LPC (i.e. <span class="code">max_lpc_order == 0</span>).
+    * libFLAC: Trailing NUL characters maintained on Vorbis comment entries so they can be treated like C strings.
+    * libFLAC: More FLAC tag (i.e. Vorbis comment) validation.
+    * libFLAC: Fixed a bug in the logic that determines the frame or sample number in a frame header; the bug could cause sync errors with some ID3v1-tagged FLAC files.
+    * libFLAC, libOggFLAC: Can now be compiled to use only integer instructions, including encoding. The decoder is almost completely integer anyway but there were a couple places that needed a fixed-point replacement. There is no fixed-point version of LPC analysis yet, so if libFLAC is compiled integer-only, the encoder will behave as if the max LPC order is 0 (i.e. used fixed predictors only). LPC decoding is supported in all cases as it always was integer-only.
+* Interface changes:
+    * libFLAC:
+        * **Changed:** Metadata object interface now maintains a trailing NUL on Vorbis comment entries for convenience.
+        * **Changed:** Metadata object interface now validates all Vorbis comment entries on input and returns false if an entry does not conform to the Vorbis comment spec.
+        * **Added** FLAC__format_vorbiscomment_entry_name_is_legal()
+        * **Added** FLAC__format_vorbiscomment_entry_value_is_legal()
+        * **Added** FLAC__format_vorbiscomment_entry_is_legal()
+        * **Added** FLAC__metadata_object_vorbiscomment_entry_from_name_value_pair()
+        * **Added** FLAC__metadata_object_vorbiscomment_entry_to_name_value_pair()
+        * **Changed** the signature of FLAC__metadata_object_vorbiscomment_entry_matches(): the first argument is now <span class="code">FLAC__StreamMetadata_VorbisComment_Entry entry</span> (was <span class="code">const FLAC__StreamMetadata_VorbisComment_Entry \*entry</span>), i.e. <span class="code">entry</span> is now pass-by-value.
+    * libFLAC++:
+        * **Changed:** Metadata object interface now maintains a trailing NUL on Vorbis comment values for convenience.
+        * **Changed:** Metadata object interface now validates all Vorbis comment entries on input and returns false if an entry does not conform to the Vorbis comment spec.
+        * **Changed:** All Metadata objects' operator=() methods now return a reference to themselves.
+        * **Added** methods to FLAC::Metadata::VorbisComment::Entry for setting comment values from null-terminated strings:
+            * Entry(const char \*field)
+            * Entry(const char \*field_name, const char \*field_value)
+            * bool set_field(const char \*field)
+            * bool set_field_value(const char \*field_value)
+        * **Changed** the signature of FLAC::Metadata::VorbisComment::get_vendor_string() and FLAC::Metadata::VorbisComment::set_vendor_string() to use a UTF-8, NUL-terminated string <span class="code">const FLAC__byte *</span> for the vendor string instead of <span class="code">FLAC::Metadata::VorbisComment::Entry</span>.
+        * **Added** FLAC::Metadata::*::assign() to all Metadata objects.
+        * **Added** bool FLAC::Metadata::get_tags(const char \*filename, VorbisComment &tags)
+    * libOggFLAC:
+        * (none)
+    * libOggFLAC++:
+        * (none)
+
+## FLAC 1.1.1 (01-Oct-2004)  
+
+* General:
+    * Ogg FLAC seeking now works
+    * New optimizations almost double the decoding speed on PowerPC (e.g. Mac G4/G5)
+    * A native OS X release thanks to updated Project Builder and autotools files
+* FLAC format:
+    * Made invalid the metadata block type 127 so that audio frames can always be distinguished from metadata by seeing 0xff as the first byte. (This was also required for the Ogg FLAC mapping.)
+* Ogg FLAC format:
+    * First official FLAC->Ogg bitstream mapping standardized (see new [FLAC-to-Ogg mapping specification](ogg_mapping.html)). See the documentation for the <span class="argument">[--ogg](https://xiph.org/flac/documentation_tools_flac.html#flac_options_ogg)</span> switch about having to re-encode older Ogg FLAC files.
+* flac:
+    * Print an error when output file already exists instead of automatically overwriting.
+    * New option <span class="argument">[-f](https://xiph.org/flac/documentation_tools_flac.html#flac_options_force)</span> (<span class="argument">[--force](https://xiph.org/flac/documentation_tools_flac.html#flac_options_force)</span>) to force overwriting if the output file already exists.
+    * New option <span class="argument">[--cue](https://xiph.org/flac/documentation_tools_flac.html#flac_options_cue)</span> to select a specific section to decode using cuesheet track/index points.
+    * New option <span class="argument">[--totally-silent](https://xiph.org/flac/documentation_tools_flac.html#flac_options_totally_silent)</span> to suppress all output.
+    * New (but undocumented) option <span class="argument">--apply-replaygain-which-is-not-lossless</span> which applies ReplayGain to the decoded output. See [this thread](http://www.hydrogenaud.io/forums/index.php?showtopic=17293&st=11) for usage and caveats.
+    * When encoding to Ogg FLAC, use a random serial number (instead of 0 as was done before) when a serial number is not specified.
+    * When encoding multiple Ogg FLAC streams, <span class="argument">--serial-number</span> or random serial number sets the first number, which is then incremented for subsequent streams (before, the same serial number was used for all streams).
+    * Decoder no longer exits with an error when writing to stdout and the pipe is broken.
+    * Better explanation of common error messages.
+    * Default extension when writing AIFF files is .aif (before, it was .aiff).
+    * Write more common representation of SANE numbers in AIFF files.
+    * Bug fix: calculating ReplayGain on 48kHz streams.
+    * Bug fix: check for supported block alignments in WAVE files.
+    * Bug fix: "offset" field in AIFF SSND chunk properly handled.
+    * Bug fix: [#679166](http://sourceforge.net/p/flac/bugs/77/): flac doesn't respect RIFF subchunk padding byte.
+    * Bug fix: [#828391](http://sourceforge.net/p/flac/bugs/90/): --add-replay-gain segfaults.
+    * Bug fix: [#851155](http://sourceforge.net/p/flac/bugs/96/): Can't seek to position in flac file.
+    * Bug fix: [#851756](http://sourceforge.net/p/flac/bugs/97/): flac --skip --until reads entire file.
+    * Bug fix: [#877122](http://sourceforge.net/p/flac/bugs/100/): problem parsing cuesheet with CATALOG entry.
+    * Bug fix: [#896057](http://sourceforge.net/p/flac/bugs/104/): parsing ISRC number from cuesheet.
+* metaflac:
+    * Renamed the tag editing options as follows (the <span class="argument">...-vc-...</span> options still work but are deprecated):
+        * <span class="argument">--show-vc-vendor</span> becomes <span class="argument">--show-vendor-tag</span>
+        * <span class="argument">--show-vc-field</span> becomes <span class="argument">--show-tag</span>
+        * <span class="argument">--remove-vc-all</span> becomes <span class="argument">--remove-all-tags</span>
+        * <span class="argument">--remove-vc-field</span> becomes <span class="argument">--remove-tag</span>
+        * <span class="argument">--remove-vc-firstfield</span> becomes <span class="argument">--remove-first-tag</span>
+        * <span class="argument">--set-vc-field</span> becomes <span class="argument">--set-tag</span>
+        * <span class="argument">--import-vc-from</span> becomes <span class="argument">--import-tags-from</span>
+        * <span class="argument">--export-vc-to</span> becomes <span class="argument">--export-tags-to</span>
+    * Better explanation of common error messages.
+    * Bug fix: calculating ReplayGain on 48kHz streams.
+    * Bug fix: incorrect numbers when printing seek points.
+* plugins:
+    * Speed optimization in ReplayGain synthesis.
+    * Speed optimization in XMMS playback.
+    * Support for big-endian architectures in XMMS plugin.
+    * Removed support for ID3 tags.
+    * Bug fix: make hard limiter default to off in XMMS plugin.
+    * Bug fix: stream length calculation bug in XMMS plugin, debian bug #200435
+    * Bug fix: small memory leak in XMMS plugin.
+* build system:
+    * <span class="code">ordinals.h</span> is now static, not a build-generated file anymore.
+* libraries:
+    * libFLAC: PPC+Altivec optimizations of some decoder routines.
+    * libFLAC: Make stream encoder encode the blocksize and sample rate in the frame header if at all possible (not in STREAMINFO), even if subset encoding was not requested.
+    * libFLAC: Bug fix: fixed seek routine where infinite loop could happen when seeking past end of stream.
+    * libFLAC, libFLAC++: added methods to skip single frames, useful for quickly finding frame boundaries (see interface changes below).
+    * libOggFLAC, libOggFLAC++: New seekable-stream and file encoder and decoder APIs to match native FLAC APIs (see interface changes below).
+* Interface changes:
+    * libFLAC:
+        * **Added** FLAC__metadata_get_tags()
+        * **Added** callback-based versions of metadata editing functions:
+            * FLAC__metadata_chain_read_with_callbacks()
+            * FLAC__metadata_chain_write_with_callbacks()
+            * FLAC__metadata_chain_write_with_callbacks_and_tempfile()
+            * FLAC__metadata_chain_check_if_tempfile_needed()
+        * **Added** decoder functions for skipping single frames, also useful for quickly finding frame boundaries:
+            * FLAC__stream_decoder_skip_single_frame()
+            * FLAC__seekable_stream_decoder_skip_single_frame()
+            * FLAC__file_decoder_skip_single_frame()
+        * **Added** new required tell callback on seekable stream encoder:
+            * FLAC__SeekableStreamEncoderTellStatus and FLAC__SeekableStreamEncoderTellStatusString\[\]
+            * FLAC__SeekableStreamEncoderTellCallback
+            * FLAC__seekable_stream_encoder_set_tell_callback()
+        * **Changed** FLAC__SeekableStreamEncoderState by adding FLAC__SEEKABLE_STREAM_ENCODER_TELL_ERROR
+        * **Changed** Tell callback is now required to initialize seekable stream encoder
+        * **Deleted** erroneous and unimplemented FLAC__file_decoder_process_remaining_frames()
+    * libFLAC++:
+        * **Added** FLAC::Metadata::get_tags()
+        * **Added** decoder functions for skipping single frames, also useful for quickly finding frame boundaries:
+            * FLAC::Decoder::Stream::skip_single_frame()
+            * FLAC::Decoder::SeekableStream::skip_single_frame()
+            * FLAC::Decoder::File::skip_single_frame()
+        * **Added** encoder functions for setting metadata:
+            * FLAC::Encoder::Stream::set_metadata(FLAC::Metadata::Prototype **metadata, unsigned num_blocks)
+            * FLAC::Encoder::SeekableStream::set_metadata(FLAC::Metadata::Prototype **metadata, unsigned num_blocks)
+            * FLAC::Encoder::File::set_metadata(FLAC::Metadata::Prototype **metadata, unsigned num_blocks)
+        * **Added** new required tell callback on seekable stream encoder:
+            * pure virtual FLAC::Encoder::SeekableStream::tell_callback()
+        * **Changed** Tell callback is now required to initialize seekable stream encoder
+        * **Deleted** the following methods:
+            * FLAC::Decoder::Stream::State::resolved_as_cstring()
+            * FLAC::Encoder::Stream::State::resolved_as_cstring()
+    * libOggFLAC:
+        * **Added** OggFLAC__SeekableStreamDecoder interface
+        * **Added** OggFLAC__FileDecoder interface
+        * **Added** OggFLAC__SeekableStreamEncoder interface
+        * **Added** OggFLAC__FileEncoder interface
+        * **Added** OggFLAC__stream_decoder_get_resolved_state_string()
+        * **Added** OggFLAC__stream_encoder_get_resolved_state_string()
+        * **Added** OggFLAC__stream_encoder_set_metadata_callback()
+        * **Changed** OggFLAC__StreamDecoderState by adding OggFLAC__STREAM_DECODER_END_OF_STREAM
+    * libOggFLAC++:
+        * **Added** OggFLAC::Decoder::SeekableStream interface
+        * **Added** OggFLAC::Decoder::File interface
+        * **Added** OggFLAC::Encoder::SeekableStream interface
+        * **Added** OggFLAC::Encoder::File interface
+        * **Added** OggFLAC::Decoder::Stream::get_resolved_state_string()
+        * **Added** OggFLAC::Encoder::Stream::get_resolved_state_string()
+        * **Added** pure virtual OggFLAC::Encoder::Stream::metadata_callback()
+
+## FLAC 1.1.0 (26-Jan-2003)  
+
+General:
+
+* All code is now [Valgrind](http://valgrind.org/)-clean!
+* New [CUESHEET](https://xiph.org/flac/format.html#def_CUESHEET) metadata block for storing CD TOC and index point information. Now a CD can be completely backed up to a single FLAC file for archival.
+* [ReplayGain](http://www.replaygain.org/) support.
+* Better compression of 24-bit files.
+* More complete AIFF support.
+* 3DNow! optimizations enabled by default.
+* Complete MSVC build system with .dsp projects for everything, which can build both static libs and DLLs, and in debug or release mode, all in the same source tree.
+    
+<span class="commandname">flac</span>:
+
+* Can now decode FLAC to AIFF; new <span class="argument">--force-aiff-format</span> option.
+* New <span class="argument">--cuesheet</span> option for reading and storing a cuesheet when encoding a whole CD. Automatically creates seek points for track and index points unless <span class="argument">--no-cued-seekpoints</span> is used.
+* New <span class="argument">--replay-gain</span> option for calculating ReplayGain values and storing them as tags.
+* New <span class="argument">--until</span> option complements <span class="argument">--skip</span> to stop decoding at a specified point in the stream.
+* <span class="argument">--skip</span> and <span class="argument">--until</span> now also accept mm:ss.ss format.
+* New <span class="argument">-S #s</span> flavor to specify seekpoints every '#' number of seconds.
+* <span class="commandname">flac</span> now defaults to <span class="argument">-S 10s</span> instead of <span class="argument">-S 100x</span> for the seek table.
+* <span class="commandname">flac</span> now adds a 4k PADDING block by default (turn off with <span class="argument">--no-padding</span>).
+* Fixed a bug with --skip and AIFF-to-FLAC encoding.
+* Fixed a bug where decoding a FLAC file whose total_samples==0 in the STREAMINFO would corrupt the WAVE header.
+
+<span class="commandname">metaflac</span>:
+
+* New <span class="argument">--import-cuesheet-from</span> option for reading and storing a cuesheet to a FLAC-encoded CD. Automatically creates seek points for track and index points unless <span class="argument">--no-cued-seekpoints</span> is used.
+* New <span class="argument">--export-cuesheet-to</span> option for writing a cuesheet from a FLAC file for use with CD authoring software.
+* New <span class="argument">--add-replay-gain</span> option for calculating ReplayGain values and storing them as tags.
+* New <span class="argument">--add-seekpoint</span> option to add seekpoints to an existing FLAC file. Includes new <span class="argument">--add-seekpoint=#s</span> flavor to add seekpoints every '#' number of seconds.
+
+XMMS plugin:
+
+* Configurable sample resolution conversion with dither.
+* ReplayGain support with customizable noise shaping, pre-amp, and optional hard limiter.
+* New Vorbis comment editor.
+* File info now works.
+* Bitrate now shows the smoothed instantaneous bitrate.
+* Uses the ARTIST tag if there is no PERFORMER tag.
+
+Winamp2 plugin:
+
+* Configurable sample resolution conversion with dither.
+* ReplayGain support with customizable noise shaping, pre-amp, and optional hard limiter.
+* File info now works.
+* Uses the ARTIST tag if there is no PERFORMER tag.
+
+Libraries (developers take note!):
+
+* All code and tests are instrumented for Valgrind. All tests run Valgrind-clean, meaning no memory leaks or buffer over/under-runs.
+* Separate 64-bit datapath through the filter in <span class="commandname">libFLAC</span> for better compression of >16 bps files.
+* <span class="code">FLAC__metadata_object_new(FLAC__METADATA_TYPE_VORBIS_COMMENT)</span> now sets the vendor string.
+* The documentation on the usage of <span class="code">FLAC::Iterator::get_block()</span> in <span class="commandname">libFLAC++</span> has an important correction. If you use this class make sure to read [this](https://xiph.org/flac/api/group__flacpp__metadata__level2.html).
+
+## FLAC 1.0.4 (24-Sep-2002)  
+
+Plugins:
+
+* Support for Vorbis comments, ID3 v1 and v2 tags.
+* Configurable title formatting and charset conversion in XMMS plugin.
+* Support for 8- and 24-bit FLAC files. There is a compile-time option for raw 24-bit output or 24bps-to-16bps linear dithering (the default).
+
+<span class="commandname">flac</span>:
+
+* Improved option parser (now uses getopt).
+* AIFF input support (thanks to Brady Patterson).
+* Small decoder speedup.
+* <span class="argument">--sector-align</span> now supported for raw input files.
+* New -T, --tag options for adding Vorbis comments while encoding.
+* New --serial-number option for use with --ogg.
+* Automatically writes vendor string in Vorbis comments.
+* Drastically reduced memory requirements.
+* Fixed bug where extra fmt/data chunks that were supposed to be skipped were not getting skipped.
+* Fixed bug in granulepos setting for Ogg FLAC streams.
+* Fixed memory leak when encoding multiple files with -V.
+
+<span class="commandname">metaflac</span>:
+
+* UTF-8 support in Vorbis comments.
+* New --import-vc-from and --export-vc-to commands for importing/exporting Vorbis comments from/to a file. For example, the following can be used to copy tags back and forth:  
+<span class="code">metaflac --export-vc-to=- --no-utf8-convert file.flac | vorbiscomment --raw -w file.ogg  
+vorbiscomment --raw -l file.ogg | metaflac --import-vc-from=- --no-utf8-convert file.flac  
+</span>
+* Fixed [bug #606796](http://sourceforge.net/p/flac/bugs/54/) where <span class="commandname">metaflac</span> was failing on read-only files.
+
+Libraries:
+
+* All APIs now meticulously documented via Doxygen. [See here](https://xiph.org/flac/api/index.html).
+* New <span class="commandname">libOggFLAC</span> and <span class="commandname">libOggFLAC++</span> libraries. These wrap around <span class="commandname">libFLAC</span> to provide encoding and decoding of Ogg FLAC streams, providing interfaces similar to the ones of the native FLAC libraries. These are also documented via Doxygen.
+* New FLAC__SeekableStreamEncoder and FLAC__FileEncoder in <span class="commandname">libFLAC</span> simplify common encoding tasks.
+* New verify mode in all encoders.
+* FLAC__stream_encoder_finish() now resets the defaults just like the stream decoders.
+* Drastically reduced memory requirements of encoders and decoders.
+* Encoder now automatically writes vendor string in VORBIS_COMMENT block.
+* Encoding speedup of fixed predictors and MD5 speedup for 16bps mono/stereo signals on x86 (thanks to Miroslav Lichvar).
+* Fixed bug in metadata interface where a bps in STREAMINFO > 16 was incorrectly parsed.
+* Fixed bug where aborting stream decoder could cause infinite loop.
+* Behavior change: simplified decoder \*_process() commands.
+* Behavior change: calling FLAC__stream_encoder_init() calls write callback once for "fLaC" signature and once for each metadata block.
+    * Behavior change: deprecated do_escape_coding and rice_parameter_search_distance in encoder.
+
+## FLAC 1.0.3 (03-Jul-2002)  
+
+New features:
+
+* 24-bit input support restored in <span class="commandname">flac</span>.
+* Decoder speedup in <span class="commandname">libFLAC</span>, which is directly passed on to the command-line decoder and plugins.
+* New <span class="argument">-F</span> option to <span class="commandname">flac</span> to continue decoding in spite of errors.
+* Correctly set granulepos in Ogg packets so seeking Ogg FLAC streams will be easier.
+* New [VORBIS_COMMENT](https://xiph.org/flac/format.html#metadata_block_vorbis_comment) metadata block for tagging with Vorbis-style comments.
+* Vastly improved <span class="commandname">metaflac</span>, now with many editing and tagging options.
+* Partial id3v1 support in Winamp plugins.
+* Updated Winamp 3 plugin.
+* Note: new semantics for -P option in <span class="commandname">flac</span>.
+* Note: removed -R option in <span class="commandname">flac</span>.
+
+New library features:
+
+* Previously mentioned decoder speedup in <span class="commandname">libFLAC</span>.
+* New metadata interface to <span class="commandname">libFLAC</span> for manipulating metadata in FLAC files.
+* New <span class="commandname">libFLAC++</span> API, an object wrapper around <span class="commandname">libFLAC</span>.
+* New [VORBIS_COMMENT](https://xiph.org/flac/format.html#metadata_block_vorbis_comment) metadata block for tagging with Vorbis-style comments.
+* Customizable metadata filtering by type in decoders.
+* Stream encoder can take an arbitrary list of metadata blocks, instead of just one SEEKTABLE and/or PADDING block.
+
+Bugs fixed:
+
+* Fixed bug with using pipes under Windows.
+* Fixed several bugs in the plugins and made them more robust in general.
+* Fixed bug in <span class="commandname">flac</span> where decoding to WAVE of a FLAC file with 0 for total_samples in the STREAMINFO block yielded a WAVE chunk of 0 size.
+* Fixed bug in Ogg packet numbering.
+
+## FLAC 1.0.2 (03-Dec-2001)  
+
+* This release is only to fix a bug that was causing some of the plugins to crash sporadically. It can also affect <span class="commandname">libFLAC</span> users that reuse one file decoder instance for multiple files
+
+## FLAC 1.0.1 (14-Nov-2001)  
+
+New features for users:
+
+* Support for Ogg-FLAC, i.e. <span class="commandname">flac</span> can now read and write FLAC streams using Ogg as the transport layer.
+* New Winamp 3 plugin based on the Wasabi Beta 1 SDK.
+* New utilities for adding FLAC support to the Monkey's Audio GUI (see [how](https://xiph.org/flac/documentation_tasks.html#monkey)).
+* Mac OS X support. The download area now contains an OS X binary release.
+* Mingw32 support.
+* Better handling of MS-specific 'fmt' chunks in WAVE files.
+
+New features for developers:
+
+* Added a SeekableStreamDecoder layer between StreamDecoder and FileDecoder. This makes it easier to use libFLAC in situations where files have been abstracted away. See the latest [documentation](https://xiph.org/flac/api/index.html) for more. The interface for the StreamDecoder and FileDecoder remain the same and are still binary-compatible with libFLAC 1.0.
+* Drastically reduced the stack requirements of the encoder.
+
+Bug fixes:
+
+* Fixed a serious bug with <span class="commandname">flac</span> and raw input where the encoder was trying to rewind when it shouldn't, which would add 12 junk samples to the encoded file. This was not present in WAVE encoding.
+* Fixed a minor bug in <span class="commandname">libFLAC</span> with setting the file name to stdin on a file decoder.
+* Fixed a minor bug in <span class="commandname">libFLAC</span> where multiple calls to setting the file name on a file decoder caused leaked memory.
+* Fixed a minor bug in <span class="commandname">metaflac</span>, now correctly skips an id3v2 tag if present.
+* Fixed a minor bug in <span class="commandname">metaflac</span>, now correctly skips long metadata blocks.
+
+## FLAC 1.0 (20-Jul-2001)  
+
+It's finally here. There are a few new features but mostly it is minor bug fixes since 0.10:
+
+* New '--sector-align' option to <span class="commandname">flac</span> which aligns a group of encoded files on CD audio sector boundaries.
+* New '--output-prefix' option to <span class="commandname">flac</span> to allow the user to prepend a prefix to all output filenames (useful, for example, for encoding/decoding to a different directory).
+* Better WAVE autodetection (doesn't rely on ungetc() anymore).
+* Cleaner one-line encoding/decoding stats.
+* Changes to the libFLAC interface and type names to make binary compatibility easier to maintain in the future.
+* New '--sse-os' option to 'configure' to enable faster SSE-based routines.
+* Another (hopefully last) fix to the Winamp 2 plugin.
+* Slightly improved Rice parameter estimation.
+* Bug fixes for some very rare corner cases when encoding.
+
+## FLAC 0.10 (07-Jun-2001)  
+
+This is probably the final beta. There have been many improvements in the last two months:
+
+* Both the encoder and decoder have been significantly sped up. Aside from C improvements, the code base now has an assembly infrastructure that allows assembly routines for different architectures to be easily integrated. Many key routines have now have faster IA-32 implementations (thanks to Miroslav).
+* A new metadata block [SEEKTABLE](https://xiph.org/flac/format.html#def_SEEKTABLE) has been defined to hold an arbitrary number of seek points, which speeds up seeking within a stream.
+* <span class="commandname">flac</span> now has a command-line usage similar to 'gzip'; make sure to see the latest [documentation](https://xiph.org/flac/documentation.html) for the new usage. It also attempts to preserve the input file's timestamp and permissions.
+* The -# options in <span class="commandname">flac</span> have been tweaked to yield the best compression-to-encode-time ratios. The new default is -5.
+* <span class="commandname">flac</span> can now usually autodetect WAVE files when encoding so that -fw is usually not needed when encoding from stdin.
+* The WAVE reader in <span class="commandname">flac</span> now just ignores (with a warning) unsupported sub-chunks instead of aborting with an error.
+* Added an option '--delete-input-file' to <span class="commandname">flac</span> which automatically deletes the input after a successful encode/decode.
+* Added an option '-o' to <span class="commandname">flac</span> to force the output file name (the old usage of "flac - outputfilename" is no longer supported).
+* Changed the XMMS plugin to send smaller chunks of samples (now 512) so that visualization is not slow.
+* Fixed a bug in the stream decoder where the decoded samples counter got corrupted after a seek.
+
+## FLAC 0.9 (31-Mar-2001)  
+
+Bug fixes and some new features:
+
+* FLAC's sync code has been lengthened to 14 bits from 9 bits. This should enable a faster and more robust synchronization mechanism.
+* Two reserved bits were added to the frame header.
+* A CRC-16 was added to the FLAC frame footer, and the decoder now does frame integrity checking based on the CRC.
+* The format now includes a new subframe field to indicate when a subblock has one or more 0 LSBs for all samples. This increases compression on some kinds of data.
+* Added two options to the analysis mode, one for including the residual signal in the analysis file, and one for generating gnuplot files of each subframe's residual distribution with some statistics. See the latest [documentation](https://xiph.org/flac/documentation.html#analysis_options).
+* XMMS plugin now supports 8-bit files.
+* Fixed a bug in the Winamp2 plugin where the audio sounded garbled.
+* Fixed a bug in the Winamp2 plugin where Winamp would hang sporadically at the end of a track (c.f. [bug #231197](http://sourceforge.net/projects/flac/&atid=113478)).
+
+## FLAC 0.8 (05-Mar-2001)  
+
+Changes since 0.7:
+
+* Created a new utility called <span class="commandname">metaflac</span>. It is a metadata editor for .flac files. Right now it just lists the contents of the metadata blocks but eventually it will allow update/insertion/deletion.
+* Added two new metadata blocks: PADDING which has an obvious function, and APPLICATION, which is meant to be open to third party applications. See the [latest format docs](https://xiph.org/flac/format.html#def_APPLICATION) for more info, or the new [id registration page](https://xiph.org/flac/id.html).
+* Added a <span class="argument">-P</span> option to <span class="commandname">flac</span> to reserve a PADDING block when encoding.
+* Added support for 24-bit files to <span class="commandname">flac</span> (the FLAC format always supported it).
+* Started the Winamp3 plugin.
+* Greatly expanded the test suite, adding more streams (24-bit streams, noise streams, non-audio streams, more patterns) and more option combinations to the encoder. The test suite runs about 30 streams and over 5000 encodings now.
+* Fixed a bug in <span class="commandname">libFLAC</span> that happened when using an exhaustive LPC coefficient quantization search with 8 bps input.
+* Fixed a bug in <span class="commandname">libFLAC</span> where the error estimation in the fixed predictor could overflow.
+* Fixed a bug in <span class="commandname">libFLAC</span> where LPC was attempted even when the autocorrelation coefficients implied it wouldn't help.
+* Reworked the LPC coefficient quantizer, which also fixed another bug that might occur in rare cases.
+* Really fixed the '-V overflow' bug (c.f. [bug #231976](http://sourceforge.net/p/flac/bugs/5/)).
+* Fixed a bug in <span class="commandname">flac</span> related to the decode buffer sizing.FLAC is very close to being ready for an official release. The only known problems left are with the Winamp plugins, which should be fixed soon, and pipes with MSVC.
+
+## FLAC 0.7 (12-Feb-2001)  
+
+Changes:
+ 
+* Fixed a bug that happened when both -fr and --seek were used at the same time.
+* Fixed a bug with -p (c.f. [bug #230992](http://sourceforge.net/p/flac/bugs/1/)).
+* Fixed a bug that happened when using large (>32K) blocksizes and -V (c.f. [bug #231976](http://sourceforge.net/p/flac/bugs/5/)).
+* Fixed a bug where encoder was double-closing a file.
+* Expanded the test suite.
+* Added more optimization flags for gcc, which should speed up flac.
+
+## FLAC 0.6 (28-Jan-2001)  
+
+The encoder is now much faster. The -m option has been sped up by 4x and -r improved, meaning that in the default compression mode (-6), encoding should be at least 3 times faster. Other changes:
+
+* Some bugs related to <span class="commandname">flac</span> and pipes were fixed
+* A "loose mid-side" (<span class="argument">-M</span>) option to the encoder has been added, which adaptively switches between independent and mid-side coding, instead of the exhaustive search that <span class="argument">-m</span> does.
+* An analyze mode (<span class="argument">-a</span>) has been added to <span class="commandname">flac</span>. This is useful mainly for developers; currently it will dump info about each frame and subframe to a file. It's a text file in a format that can be easily processed by scripts; a separate analysis program is in the works.
+* The source now has an autoconf/libtool-based build system. This should allow the source to build "out-of-the-box" on many more platforms.
+
+## FLAC 0.5 (15-Jan-2001)  
+
+This is the first beta version of FLAC. Being beta, there will be no changes to the format that will break older streams, unless a serious bug involving the format is found. What this means is that, barring such a bug, streams created with 0.5 will be decodable by future versions. This version also includes some new features:
+
+* An [MD5 signature](http://userpages.umbc.edu/~mabzug1/cs/md5/md5.html) of the unencoded audio is computed during encoding, and stored in the Encoding metadata block in the stream header. When decoding, <span class="commandname">flac</span> will now compute the MD5 signature of the decoded data and compare it against the signature in the stream header.
+* A test mode (<span class="argument">-t</span>) has been added to <span class="commandname">flac</span>. It works like decode mode but doesn't write an output file.
+
+## FLAC 0.4 (23-Dec-2000)  
+
+This version fixes a bug in the constant subframe detection. More importantly, a verify option (-V) has been added to <span class="commandname">flac</span> that verifies the encoding process. With this option turned on, <span class="commandname">flac</span> will create a parallel decoder while encoding to make sure that the encoded output decodes to exactly match the original input. In this way, any unknown bug in the encoder will be caught and <span class="commandname">flac</span> will abort with an error message.

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
new file mode 100644
index 0000000..4c6ae79
--- /dev/null
+++ b/CONTRIBUTING.md

@@ -0,0 +1,74 @@
+Thanks for considering contributing to the FLAC project!
+
+Contributing to FLAC is possible in many ways. Among them are
+
+- Reporting bugs or other issues at https://github.com/xiph/flac/issues
+- Submitting patches at https://github.com/xiph/flac/pulls
+- Testing FLAC playing devices and software at
+  https://wiki.hydrogenaud.io/index.php?title=FLAC_decoder_testbench
+
+General communication not specific to issues is generally done through
+the FLAC mailing lists:
+
+- For user questions and discussions:
+  https://lists.xiph.org/mailman/listinfo/flac 
+- For developer questions and discussions:
+  https://lists.xiph.org/mailman/listinfo/flac-dev
+
+## Goals
+
+Since FLAC is an open-source project, it's important to have a set of
+goals that everyone works to. They may change slightly from time to time
+but they're a good guideline. Changes should be in line with the goals
+and should not attempt to embrace any of the anti-goals.
+
+**Goals**
+
+- FLAC should be and stay an open format with an open-source reference
+  implementation.
+- FLAC should be lossless. This seems obvious but lossy compression
+  seems to creep into every audio codec. This goal also means that flac
+  should stay archival quality and be truly lossless for all input.
+  Testing of releases should be thorough.
+- FLAC should yield respectable compression, on par or better than other
+  lossless codecs.
+- FLAC should allow at least realtime decoding on even modest hardware.
+- FLAC should support fast sample-accurate seeking.
+- FLAC should allow gapless playback of consecutive streams. This follows from the lossless goal.
+- The FLAC project owes a lot to the many people who have advanced the
+  audio compression field so freely, and aims also to contribute through
+  the open-source development of new ideas.
+
+**Anti-goals**
+
+- Lossy compression. There are already many suitable lossy formats (Ogg
+  Vorbis, MP3, etc.).
+- Copy prevention, DRM, etc. There is no intention to add any copy
+  prevention methods. Of course, we can't stop someone from encrypting a
+  FLAC stream in another container (e.g. the way Apple encrypts AAC in
+  MP4 with FairPlay), that is the choice of the user.
+
+
+## Contributing patches
+
+Contributions to FLAC should be licensed with the same license as the
+part of the FLAC project the contribution belongs to. These are
+
+- libFLAC and libFLAC++ are licensed under Xiph.org's
+  BSD-like license (see COPYING.Xiph), so contributions to these
+  libraries should also be licensed under this license, otherwise they
+  cannot be accepted
+- the flac and metaflac command line programs are licensed under GPLv2, 
+  see COPYING.GPL
+- the helper libraries for flac and metaflac (which are in src/share)
+  are licensed under varying licenses, see the license preamble for each
+  file to see how they are licensed
+
+Patches can be contributed through GitHub as a Pull Request.
+Alternatively you can supply patches through the mailing list.
+
+## Code style
+
+FLAC does have its own peculiar coding style that does not seem to fit
+general categories. You can use `git clang-format` to have your patch
+auto-formatted similar to the rest of the code.

diff --git a/COPYING.Xiph b/COPYING.Xiph
index c0361fd..edd24f7 100644
--- a/COPYING.Xiph
+++ b/COPYING.Xiph

@@ -1,5 +1,5 @@
 Copyright (C) 2000-2009  Josh Coalson
-Copyright (C) 2011-2014  Xiph.Org Foundation
+Copyright (C) 2011-2022  Xiph.Org Foundation
 
 Redistribution and use in source and binary forms, with or without
 modification, are permitted provided that the following conditions
@@ -12,7 +12,7 @@
 notice, this list of conditions and the following disclaimer in the
 documentation and/or other materials provided with the distribution.
 
-- Neither the name of the Xiph.org Foundation nor the names of its
+- Neither the name of the Xiph.Org Foundation nor the names of its
 contributors may be used to endorse or promote products derived from
 this software without specific prior written permission.
 

diff --git a/README b/README
deleted file mode 100644
index 9178882..0000000
--- a/README
+++ /dev/null

@@ -1,254 +0,0 @@
-/* FLAC - Free Lossless Audio Codec
- * Copyright (C) 2001-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
- *
- * This file is part the FLAC project.  FLAC is comprised of several
- * components distributed under different licenses.  The codec libraries
- * are distributed under Xiph.Org's BSD-like license (see the file
- * COPYING.Xiph in this distribution).  All other programs, libraries, and
- * plugins are distributed under the LGPL or GPL (see COPYING.LGPL and
- * COPYING.GPL).  The documentation is distributed under the Gnu FDL (see
- * COPYING.FDL).  Each file in the FLAC distribution contains at the top the
- * terms under which it may be distributed.
- *
- * Since this particular file is relevant to all components of FLAC,
- * it may be distributed under the Xiph.Org license, which is the least
- * restrictive of those mentioned above.  See the file COPYING.Xiph in this
- * distribution.
- */
-
-
-FLAC is an Open Source lossless audio codec developed by Josh Coalson from 2001
-to 2009.
-
-From January 2012 FLAC is being maintained by Erik de Castro Lopo under the
-auspices of the Xiph.org Foundation.
-
-FLAC is comprised of
-  * `libFLAC', a library which implements reference encoders and
-    decoders for native FLAC and Ogg FLAC, and a metadata interface
-  * `libFLAC++', a C++ object wrapper library around libFLAC
-  * `flac', a command-line program for encoding and decoding files
-  * `metaflac', a command-line program for viewing and editing FLAC
-    metadata
-  * player plugin for XMMS
-  * user and API documentation
-
-The libraries (libFLAC, libFLAC++) are
-licensed under Xiph.org's BSD-like license (see COPYING.Xiph).  All other
-programs and plugins are licensed under the GNU General Public License
-(see COPYING.GPL).  The documentation is licensed under the GNU Free
-Documentation License (see COPYING.FDL).
-
-
-===============================================================================
-FLAC - 1.3.1 - Contents
-===============================================================================
-
-- Introduction
-- Prerequisites
-- Note to embedded developers
-- Building in a GNU environment
-- Building with Makefile.lite
-- Building with MSVC
-- Building on Mac OS X
-
-
-===============================================================================
-Introduction
-===============================================================================
-
-This is the source release for the FLAC project.  See
-
-	doc/html/index.html
-
-for full documentation.
-
-A brief description of the directory tree:
-
-	doc/          the HTML documentation
-	examples/     example programs demonstrating the use of libFLAC and libFLAC++
-	include/      public include files for libFLAC and libFLAC++
-	man/          the man pages for `flac' and `metaflac'
-	src/          the source code and private headers
-	test/         the test scripts
-
-If you have questions about building FLAC that this document does not answer,
-please submit them at the following tracker so this document can be improved:
-
-	https://sourceforge.net/p/flac/support-requests/
-
-
-===============================================================================
-Prerequisites
-===============================================================================
-
-To build FLAC with support for Ogg FLAC you must have built and installed
-libogg according to the specific instructions below.  You must have
-libogg 1.1.2 or greater, or there will be seeking problems with Ogg FLAC.
-
-If you are building on x86 and want the assembly optimizations, you will
-need to have NASM >= 0.98.30 installed according to the specific instructions
-below.
-
-
-===============================================================================
-Note to embedded developers
-===============================================================================
-
-libFLAC has grown larger over time as more functionality has been
-included, but much of it may be unnecessary for a particular embedded
-implementation.  Unused parts may be pruned by some simple editing of
-configure.ac and src/libFLAC/Makefile.am; the following dependency
-graph shows which modules may be pruned without breaking things
-further down:
-
-metadata.h
-	stream_decoder.h
-	format.h
-
-stream_encoder.h
-	stream_decoder.h
-	format.h
-
-stream_decoder.h
-	format.h
-
-In other words, for pure decoding applications, both the stream encoder
-and metadata editing interfaces can be safely removed.
-
-There is a section dedicated to embedded use in the libFLAC API
-HTML documentation (see doc/html/api/index.html).
-
-Also, there are several places in the libFLAC code with comments marked
-with "OPT:" where a #define can be changed to enable code that might be
-faster on a specific platform.  Experimenting with these can yield faster
-binaries.
-
-
-===============================================================================
-Building in a GNU environment
-===============================================================================
-
-FLAC uses autoconf and libtool for configuring and building.
-Better documentation for these will be forthcoming, but in
-general, this should work:
-
-./configure && make && make check && make install
-
-The 'make check' step is optional; omit it to skip all the tests,
-which can take several hours and use around 70-80 megs of disk space.
-Even though it will stop with an explicit message on any failure, it
-does print out a lot of stuff so you might want to capture the output
-to a file if you're having a problem.  Also, don't run 'make check'
-as root because it confuses some of the tests.
-
-NOTE: Despite our best efforts it's entirely possible to have
-problems when using older versions of autoconf, automake, or
-libtool.  If you have the latest versions and still can't get it
-to work, see the next section on Makefile.lite.
-
-There are a few FLAC-specific arguments you can give to
-`configure':
-
---enable-debug : Builds everything with debug symbols and some
-extra (and more verbose) error checking.
-
---disable-asm-optimizations : Disables the compilation of the
-assembly routines.  Many routines have assembly versions for
-speed and `configure' is pretty good about knowing what is
-supported, but you can use this option to build only from the
-C sources.  May be necessary for building on OS X (Intel).
-
---enable-sse : If you are building for an x86 CPU that supports
-SSE instructions, you can enable some of the faster routines
-if your operating system also supports SSE instructions.  flac
-can tell if the CPU supports the instructions but currently has
-no way to test if the OS does, so if it does, you must pass
-this argument to configure to use the SSE routines.  If flac
-crashes when built with this option you will have to go back and
-configure without --enable-sse.  Note that
---disable-asm-optimizations implies --disable-sse.
-
---enable-local-xmms-plugin : Installs the FLAC XMMS plugin in
-$HOME/.xmms/Plugins, instead of the global XMMS plugin area
-(usually /usr/lib/xmms/Input).
-
---with-ogg=
---with-xmms-prefix=
---with-libiconv-prefix=
-Use these if you have these packages but configure can't find them.
-
-If you want to build completely from scratch (i.e. starting with just
-configure.ac and Makefile.am) you should be able to just run 'autogen.sh'
-but make sure and read the comments in that file first.
-
-
-===============================================================================
-Building with Makefile.lite
-===============================================================================
-
-There is a more lightweight build system for do-it-yourself-ers.
-It is also useful if configure isn't working, which may be the
-case since lately we've had some problems with different versions
-of automake and libtool.  The Makefile.lite system should work
-on GNU systems with few or no adjustments.
-
-From the top level just 'make -f Makefile.lite'.  You can
-specify zero or one optional target from 'release', 'debug',
-'test', or 'clean'.  The default is 'release'.  There is no
-'install' target but everything you need will end up in the
-obj/ directory.
-
-If you are not on an x86 system or you don't have nasm, you
-may have to change the DEFINES in src/libFLAC/Makefile.lite.  If
-you don't have nasm, remove -DFLAC__HAS_NASM.  If your target is
-not an x86, change -DFLAC__CPU_IA32 to -DFLAC__CPU_UNKNOWN.
-
-
-===============================================================================
-Building with MSVC
-===============================================================================
-
-There are .vcproj projects and a master FLAC.sln solution to build all
-the libraries and executables with MSVC 2005 or newer.
-
-Prerequisite: you must have the Ogg libraries installed as described
-later.
-
-Prerequisite: you must have nasm installed, and nasm.exe must be in
-your PATH, or the path to nasm.exe must be added to the list of
-directories for executable files in the MSVC global options.
-
-To build everything, run Visual Studio, do File|Open and open FLAC.sln.
-From the dropdown in the toolbar, select "Release" instead of "Debug",
-then do Build|Build Solution.
-
-This will build all libraries both statically (e.g.
-objs\release\lib\libFLAC_static.lib) and as DLLs (e.g.
-objs\release\lib\libFLAC.dll), and it will build all binaries, statically
-linked (e.g. objs\release\bin\flac.exe).
-
-Everything will end up in the "objs" directory.  DLLs and .exe files
-are all that are needed and can be copied to an installation area and
-added to the PATH.
-
-By default the code is configured with Ogg support. Before building FLAC
-you will need to get the Ogg source distribution
-(see http://xiph.org/downloads/), build libogg_static.lib (load
-win32\libogg_static.sln, change solution configuration to "Release" and
-code generation to "Multi-threaded (/MT)", then build), copy libogg_static.lib
-into FLAC's 'objs\release\lib' directory, and copy the entire include\ogg tree
-into FLAC's 'include' directory (so that there is an 'ogg' directory in FLAC's
-'include' directory with the files ogg.h, os_types.h and config_types.h).
-
-If you want to build without Ogg support, instead edit all .vcproj files
-and remove any "FLAC__HAS_OGG" definitions.
-
-
-===============================================================================
-Building on Mac OS X
-===============================================================================
-
-If you have Fink or a recent version of OS X with the proper autotools,
-the GNU flow above should work.

diff --git a/README.chromium b/README.chromium
index 9e2ee71..9717c3e 100644
--- a/README.chromium
+++ b/README.chromium

@@ -1,27 +1,13 @@
 Name: flac
-URL: http://downloads.xiph.org/releases/flac/flac-1.3.1.tar.xz
-Version: 1.3.1
+URL: https://github.com/xiph/flac/archive/refs/tags/1.4.2.zip
+Version: 1.4.2
 License: BSD
 License File: COPYING.Xiph
 Security Critical: yes
 
 Description:
-This contains a copy of flac-1.3.1
+This is a straight dump of flac-1.4.2 with all the unused files removed.
 
 This library is required for the browser to compress and encode recorded audio
-before sending to Google servers for speech recognition. This is a straight dump
-of flac-1.3.1 with all the unused files removed and the following changes:
-
-- Added flac.gyp, flac.h, README.chromium and src/libFLAC/alloc.c
-- Replaced include/share/alloc.h with a new file.
-- include/libFLAC/private/macros.h: don't conditionalize on GCC version >= 4.3;
-  Clang looks like GCC 4.2, but we want these defines.
-- src/share/win_utf8_io/win_utf8_io.c: call LoadLibraryA instead of just
-  LoadLibrary since the argument is a non-wide string.
-- src/share/libFLAC/bitwriter.c: add extern declaration of inline
-  function FLAC__bitwriter_write_raw_uint32 to fix link errors. Upstream:
-  https://git.xiph.org/?p=flac.git;a=commit;h=28817ba52fdd92b0a49f84b2e8848199efc00242
-- src/libFLAC/lpc.c: Restore missing conditional. Upstream:
-  https://git.xiph.org/?p=flac.git;a=commit;h=ef9f7998fd358b2812a7296fe1c5aed221bade5f
-- Include limits.h on Android to work around NDK deficiency. Upstream:
-  https://git.xiph.org/?p=flac.git;a=commit;h=b762a20ace7c7771f87f63478bcee3cf51268cff
+before sending to Google servers for speech recognition. It is also used to
+play system sounds.

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..89029e1
--- /dev/null
+++ b/README.md

@@ -0,0 +1,278 @@
+<!---
+/* FLAC - Free Lossless Audio Codec
+ * Copyright (C) 2001-2009  Josh Coalson
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
+ *
+ * This file is part the FLAC project.  FLAC is comprised of several
+ * components distributed under different licenses.  The codec libraries
+ * are distributed under Xiph.Org's BSD-like license (see the file
+ * COPYING.Xiph in this distribution).  All other programs, libraries, and
+ * plugins are distributed under the LGPL or GPL (see COPYING.LGPL and
+ * COPYING.GPL).  The documentation is distributed under the Gnu FDL (see
+ * COPYING.FDL).  Each file in the FLAC distribution contains at the top the
+ * terms under which it may be distributed.
+ *
+ * Since this particular file is relevant to all components of FLAC,
+ * it may be distributed under the Xiph.Org license, which is the least
+ * restrictive of those mentioned above.  See the file COPYING.Xiph in this
+ * distribution.
+ */
+--->
+
+# Free Lossless Audio Codec (FLAC)
+
+FLAC is open source software that can reduce the amount of storage space
+needed to store digital audio signals without needing to remove
+information in doing so.
+
+The files read and produced by this software are called FLAC files. As
+these files (which follow the [FLAC format](https://xiph.org/flac/format.html))
+can be read from and written to by other software as well, this software
+is often referred to as the FLAC reference implementation.
+
+FLAC has been developed by volunteers. If you want to help out, see
+CONTRIBUTING.md for more information.
+
+## Components
+
+FLAC is comprised of
+  * libFLAC, a library which implements reference encoders and
+    decoders for native FLAC and Ogg FLAC, and a metadata interface
+  * libFLAC++, a C++ object wrapper library around libFLAC
+  * `flac`, a command-line program for encoding and decoding files
+  * `metaflac`, a command-line program for viewing and editing FLAC
+    metadata
+  * user and API documentation
+
+The libraries (libFLAC, libFLAC++) are licensed under Xiph.org's
+BSD-like license (see COPYING.Xiph). All other programs and plugins are
+licensed under the GNU General Public License (see COPYING.GPL). The
+documentation is licensed under the GNU Free Documentation License
+(see COPYING.FDL).
+
+## Documentation
+
+For documentation of the `flac` and `metaflac` command line tools, see
+the directory man, which contains the files flac.md and metaflac.md
+
+The API documentation is in html and is generated by Doxygen. It can be
+found in the directory doc/html/api. It is included in a release tarball
+and must be build with Doxygen when the source is taken directly from
+git.
+
+The directory examples contains example source code on using libFLAC and
+libFLAC++.
+
+Documentation concerning the FLAC format itself (which can be used to
+create software reading and writing FLAC software independent from
+libFLAC) was included in previous releases, but can now be found on
+https://datatracker.ietf.org/doc/draft-ietf-cellar-flac/ Additionally
+a set of files for conformance testing called the FLAC decoder testbench
+can be found at https://github.com/ietf-wg-cellar/flac-test-files
+
+If you have questions about FLAC that this document does not answer,
+please submit them at the following tracker so this document can be
+improved:
+
+https://github.com/xiph/flac/issues
+
+## Building FLAC
+
+All components of the FLAC project can be build with a variety of
+compilers (including GCC, Clang, Visual Studio, Intel C++ Compiler) on
+many architectures (inluding x86, x86_64, ARMv7, ARMv8 and PowerPC)
+for many different operating systems.
+
+To do this, FLAC provides two build systems: one using GNU's autotools
+and one with CMake. Both differ slighly in configuration options, but
+should be considered equivalent for most use cases.
+
+FLAC used to provide files specifically for building with Visual Studio,
+but these have been removed in favor of using CMake.
+
+## Building with CMake
+
+CMake is a cross-platform build system. FLAC can be built on Windows,
+Linux, Mac OS X using CMake.
+
+You can use either CMake's CLI or GUI. We recommend you to have a
+separate build folder outside the repository in order to not spoil it
+with generated files. It is possible however to do a so-called in-tree
+build, in that case /path/to/flac-build in the following examples is
+equal to /path/to/flac-source.
+
+### CMake CLI
+
+Go to your build folder and run something like this:
+
+```
+/path/to/flac-build$ cmake /path/to/flac-source
+```
+
+or e.g. in Windows shell
+
+```
+C:\path\to\flac-build> cmake \path\to\flac-source
+```
+
+(provided that cmake is in your %PATH% variable)
+
+That will generate build scripts for the default build system (e.g.
+Makefiles for UNIX). After that you start build with a command like
+this:
+
+```
+/path/to/flac-build$ make
+```
+
+And afterwards you can run tests or install the built libraries and
+headers
+
+```
+/path/to/flac-build$ make test
+/path/to/flac-build$ make install
+```
+
+If you want use a build system other than default add -G flag to cmake,
+e.g.:
+
+```
+/path/to/flac-build$ cmake /path/to/flac-source -GNinja
+/path/to/flac-build$ ninja
+```
+
+or:
+
+```
+/path/to/flac-build$ cmake /path/to/flac-source -GXcode
+```
+
+Use cmake --help to see the list of available generators.
+
+By default CMake will search for OGG. If CMake fails to find it you can
+help CMake by specifying the exact path:
+
+```
+/path/to/flac-build$ cmake /path/to/flac-source -DOGG_ROOT=/path/to/ogg
+```
+
+If you would like CMake to build OGG alongside FLAC, you can place the
+ogg sources directly in the flac source directory as a subdirectory with
+the name ogg, for example:
+
+```
+/path/to/flac-source/ogg
+```
+
+If you don't want to build flac with OGG support you can tell CMake not
+to look for OGG:
+
+```
+/path/to/flac-build$ cmake /path/to/flac-source -DWITH_OGG=OFF
+```
+
+Other FLAC's options (e.g. building C++ lib or docs) can also be put to
+cmake through -D flag. If you want to know what options are available,
+use -LH:
+
+```
+/path/to/flac-build$ cmake /path/to/flac-source -LH
+```
+
+### CMake GUI (for Visual Studio)
+It is likely that you would prefer to use the CMake GUI if you use
+Visual Studio to build FLAC. It's in essence the same process as
+building using CLI.
+
+Open cmake-gui. In the window select a source directory (the
+repository's root), a build directory (some other directory outside the
+repository). Then press button "Configure". CMake will ask you which
+build system you prefer. Choose that version of Visual Studio which you
+have on your system, choose whether you want to build for Win32 or x64.
+Press OK.
+
+After CMake finishes you can change the configuration to your liking and
+if you change anything, run Configure again. With the "Generate" button,
+CMake creates Visual Studio files, which can be opened from Visual
+Studio. With the button "Open Project" CMake will launch Visual Studio
+and open the generated solution. You can use the project files as usual
+but remember that they were generated by CMake. That means that your
+changes (e.g. some additional compile flags) will be lost when you run
+CMake next time.
+
+CMake searches by default for OGG on your system and returns an error
+if it cannot find it. If you want to build OGG alongside FLAC, you can
+download the OGG sources and extract them in a subdirectory of the FLAC
+source directory with the name ogg (i.e. /path/to/flac-source/ogg)
+before running CMake. If you don't want to build FLAC with OGG support,
+untick the box following WITH_OGG flag in the list of variables in
+cmake-gui window and run "Configure" again.
+
+If CMake fails to find MSVC compiler then running cmake-gui from MS
+Developer comand prompt should help.
+
+## Building with GNU autotools
+
+FLAC uses autoconf and libtool for configuring and building. To
+configure a build, open a commmand line/terminal and run `./configure`
+You can provide options to this command, which are listed by running
+`./configure --help`.
+
+In case the configure script is not present (for example when building
+from git and not from a release tarball), it can be generated by running
+`./autogen.sh`. This may require a libtool development package though.
+
+After configuration, build with `make`, verify the build with
+`make check` and install with `make install`. Installation might require
+administrator priviledged, i.e. `sudo make install`.
+
+The 'make check' step is optional; omit it to skip all the tests, which
+can take about an hour to complete. Even though it will stop with an
+explicit message on any failure, it does print out a lot of stuff so you
+might want to capture the output to a file if you're having a problem.
+Also, don't run 'make check' as root because it confuses some of the
+tests.
+
+Summarizing:
+
+```
+./configure
+make && make check
+sudo make install
+```
+
+## Note to embedded developers
+
+libFLAC has grown larger over time as more functionality has been
+included, but much of it may be unnecessary for a particular embedded
+implementation.  Unused parts may be pruned by some simple editing of
+configure.ac and src/libFLAC/Makefile.am; the following dependency
+graph shows which modules may be pruned without breaking things
+further down:
+
+```
+metadata.h
+    stream_decoder.h
+    format.h
+
+stream_encoder.h
+    stream_decoder.h
+    format.h
+
+stream_decoder.h
+    format.h
+```
+
+In other words, for pure decoding applications, both the stream encoder
+and metadata editing interfaces can be safely removed. Note that this
+is specific to building the libraries for embedded use. The command line
+tools do not provide such compartmentalization, and require a complete
+libFLAC build to function.
+
+There is a section dedicated to embedded use in the libFLAC API
+HTML documentation (see doc/html/api/index.html).
+
+Also, there are several places in the libFLAC code with comments marked
+with "OPT:" where a #define can be changed to enable code that might be
+faster on a specific platform.  Experimenting with these can yield
+faster binaries.

diff --git a/codereview.settings b/codereview.settings
index 13904f6..bf79ca3 100644
--- a/codereview.settings
+++ b/codereview.settings

@@ -1,7 +1,3 @@
 # This file is used by git-cl to get repository specific information.
-CC_LIST: chromium-reviews@chromium.org
-CODE_REVIEW_SERVER: codereview.chromium.org
 GERRIT_HOST: True
 PROJECT: flac
-STATUS: http://chromium-status.appspot.com/status
-VIEW_VC: https://chromium.googlesource.com/chromium/deps/flac/+/

diff --git a/flac.gyp b/flac.gyp
deleted file mode 100644
index cea6e98..0000000
--- a/flac.gyp
+++ /dev/null

@@ -1,119 +0,0 @@
-# Copyright (c) 2011 The Chromium Authors. All rights reserved.
-# Use of this source code is governed by a BSD-style license that can be
-# found in the LICENSE file.
-
-{
-  'targets': [
-    {
-      'target_name': 'libflac',
-      'product_name': 'flac',
-      'type': 'static_library',
-      'sources': [
-        'include/FLAC/all.h',
-        'include/FLAC/assert.h',
-        'include/FLAC/callback.h',
-        'include/FLAC/export.h',
-        'include/FLAC/format.h',
-        'include/FLAC/metadata.h',
-        'include/FLAC/ordinals.h',
-        'include/FLAC/stream_decoder.h',
-        'include/FLAC/stream_encoder.h',
-        'include/share/alloc.h',
-        'include/share/compat.h',
-        'include/share/endswap.h',
-        'include/share/private.h',
-        'src/libFLAC/alloc.c',
-        'src/libFLAC/bitmath.c',
-        'src/libFLAC/bitreader.c',
-        'src/libFLAC/bitwriter.c',
-        'src/libFLAC/cpu.c',
-        'src/libFLAC/crc.c',
-        'src/libFLAC/fixed.c',
-        'src/libFLAC/float.c',
-        'src/libFLAC/format.c',
-        'src/libFLAC/lpc.c',
-        'src/libFLAC/md5.c',
-        'src/libFLAC/memory.c',
-        'src/libFLAC/stream_decoder.c',
-        'src/libFLAC/stream_encoder.c',
-        'src/libFLAC/stream_encoder_framing.c',
-        'src/libFLAC/window.c',
-        'src/libFLAC/include/private/all.h',
-        'src/libFLAC/include/private/bitmath.h',
-        'src/libFLAC/include/private/bitreader.h',
-        'src/libFLAC/include/private/bitwriter.h',
-        'src/libFLAC/include/private/cpu.h',
-        'src/libFLAC/include/private/crc.h',
-        'src/libFLAC/include/private/fixed.h',
-        'src/libFLAC/include/private/float.h',
-        'src/libFLAC/include/private/format.h',
-        'src/libFLAC/include/private/lpc.h',
-        'src/libFLAC/include/private/macros.h',
-        'src/libFLAC/include/private/md5.h',
-        'src/libFLAC/include/private/memory.h',
-        'src/libFLAC/include/private/metadata.h',
-        'src/libFLAC/include/private/stream_encoder.h',
-        'src/libFLAC/include/private/stream_encoder_framing.h',
-        'src/libFLAC/include/private/window.h',
-        'src/libFLAC/include/protected/all.h',
-        'src/libFLAC/include/protected/stream_decoder.h',
-        'src/libFLAC/include/protected/stream_encoder.h',
-      ],
-      'defines': [
-        'FLAC__NO_DLL',
-        'FLAC__OVERFLOW_DETECT',
-        'VERSION="1.3.1"',
-        'HAVE_LROUND',
-      ],
-      'conditions': [
-        ['OS=="win"', {
-          'sources': [
-            'include/share/win_utf8_io.h',
-            'src/share/win_utf8_io/win_utf8_io.c',
-          ],
-          'defines!': [
-            'WIN32_LEAN_AND_MEAN',  # win_utf8_io.c defines this itself.
-          ],
-          'msvs_settings': {
-            'VCCLCompilerTool': {
-              'AdditionalOptions': [
-                '/wd4334',  # 32-bit shift converted to 64 bits.
-                '/wd4267',  # Converting from size_t to unsigned on 64-bit.
-              ],
-            },
-          },
-        }, {
-          'defines': [
-            'HAVE_INTTYPES_H',
-          ],
-        }],
-      ],
-      'include_dirs': [
-        'include',
-        'src/libFLAC/include',
-      ],
-      'direct_dependent_settings': {
-        'defines': [
-          'FLAC__NO_DLL',
-        ],
-      },
-      'variables': {
-        'clang_warning_flags': [
-          # libflac converts between FLAC__StreamDecoderState and
-          # FLAC__StreamDecoderInitStatus a lot in stream_decoder.c.
-          '-Wno-conversion',
-          # libflac contains constants that are only used in certain
-          # compile-time cases, which triggers unused-const-variable warnings in
-          # other cases.
-          '-Wno-unused-const-variable',
-        ],
-      },
-    },
-  ],
-}
-
-# Local Variables:
-# tab-width:2
-# indent-tabs-mode:nil
-# End:
-# vim: set expandtab tabstop=2 shiftwidth=2:

diff --git a/include/FLAC/all.h b/include/FLAC/all.h
index 2851cf5..cc1c480 100644
--- a/include/FLAC/all.h
+++ b/include/FLAC/all.h

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2000-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -52,7 +52,7 @@
  * level idea of the structure and how to find the information you
  * need.  As a prerequisite you should have at least a basic
  * knowledge of the FLAC format, documented
- * <A HREF="../format.html">here</A>.
+ * <A HREF="https://xiph.org/flac/format.html">here</A>.
  *
  * \section c_api FLAC C API
  *
@@ -64,8 +64,8 @@
  *
  * By writing a little code and linking against libFLAC, it is
  * relatively easy to add FLAC support to another program.  The
- * library is licensed under <A HREF="../license.html">Xiph's BSD license</A>.
- * Complete source code of libFLAC as well as the command-line
+ * library is licensed under <A HREF="https://xiph.org/flac/license.html">Xiph's
+ * BSD license</A>. Complete source code of libFLAC as well as the command-line
  * encoder and plugins is available and is a useful source of
  * examples.
  *
@@ -97,7 +97,7 @@
  * example /usr/include/FLAC++/...).
  *
  * libFLAC++ is also licensed under
- * <A HREF="../license.html">Xiph's BSD license</A>.
+ * <A HREF="https://xiph.org/flac/license.html">Xiph's BSD license</A>.
  *
  * \section getting_started Getting Started
  *
@@ -113,7 +113,8 @@
  * functions through the links in top bar across this page.
  *
  * If you prefer a more hands-on approach, you can jump right to some
- * <A HREF="../documentation_example_code.html">example code</A>.
+ * <A HREF="https://xiph.org/flac/documentation_example_code.html">example
+ * code</A>.
  *
  * \section porting_guide Porting Guide
  *
@@ -147,7 +148,7 @@
  * library.
  *
  * Also, there are several places in the libFLAC code with comments marked
- * with "OPT:" where a #define can be changed to enable code that might be
+ * with "OPT:" where a \#define can be changed to enable code that might be
  * faster on a specific platform.  Experimenting with these can yield faster
  * binaries.
  */
@@ -159,11 +160,12 @@
  * the libraries to newer versions of FLAC.
  *
  * One simple facility for making porting easier that has been added
- * in FLAC 1.1.3 is a set of \c #defines in \c export.h of each
+ * in FLAC 1.1.3 is a set of \#defines in \c export.h of each
  * library's includes (e.g. \c include/FLAC/export.h).  The
- * \c #defines mirror the libraries'
- * <A HREF="http://www.gnu.org/software/libtool/manual/libtool.html#Libtool-versioning">libtool version numbers</A>,
- * e.g. in libFLAC there are \c FLAC_API_VERSION_CURRENT,
+ * \#defines mirror the libraries'
+ * <A
+ * HREF="http://www.gnu.org/software/libtool/manual/libtool.html#Libtool-versioning">libtool
+ * version numbers</A>, e.g. in libFLAC there are \c FLAC_API_VERSION_CURRENT,
  * \c FLAC_API_VERSION_REVISION, and \c FLAC_API_VERSION_AGE.
  * These can be used to support multiple versions of an API during the
  * transition phase, e.g.
@@ -176,7 +178,7 @@
  * #endif
  * \endcode
  *
- * The the source will work for multiple versions and the legacy code can
+ * The source will work for multiple versions and the legacy code can
  * easily be removed when the transition is complete.
  *
  * Another available symbol is FLAC_API_SUPPORTS_OGG_FLAC (defined in
@@ -241,14 +243,16 @@
  * FLAC__seekable_stream_decoder_set_read_callback(decoder, my_read_callback);
  * FLAC__seekable_stream_decoder_set_seek_callback(decoder, my_seek_callback);
  * FLAC__seekable_stream_decoder_set_tell_callback(decoder, my_tell_callback);
- * FLAC__seekable_stream_decoder_set_length_callback(decoder, my_length_callback);
- * FLAC__seekable_stream_decoder_set_eof_callback(decoder, my_eof_callback);
- * FLAC__seekable_stream_decoder_set_write_callback(decoder, my_write_callback);
- * FLAC__seekable_stream_decoder_set_metadata_callback(decoder, my_metadata_callback);
+ * FLAC__seekable_stream_decoder_set_length_callback(decoder,
+ * my_length_callback); FLAC__seekable_stream_decoder_set_eof_callback(decoder,
+ * my_eof_callback); FLAC__seekable_stream_decoder_set_write_callback(decoder,
+ * my_write_callback);
+ * FLAC__seekable_stream_decoder_set_metadata_callback(decoder,
+ * my_metadata_callback);
  * FLAC__seekable_stream_decoder_set_error_callback(decoder, my_error_callback);
  * FLAC__seekable_stream_decoder_set_client_data(decoder, my_client_data);
- * if(FLAC__seekable_stream_decoder_init(decoder) != FLAC__SEEKABLE_STREAM_DECODER_OK) do_something;
- * \endcode
+ * if(FLAC__seekable_stream_decoder_init(decoder) !=
+ * FLAC__SEEKABLE_STREAM_DECODER_OK) do_something; \endcode
  *
  * In FLAC 1.1.3 it is like this:
  *
@@ -321,7 +325,7 @@
  *
  * The \a bytes parameter to FLAC__StreamDecoderReadCallback,
  * FLAC__StreamEncoderReadCallback, and FLAC__StreamEncoderWriteCallback
- * is now \c size_t instead of \c unsigned.
+ * is now \c size_t instead of \c uint32_t.
  */
 
 /** \defgroup porting_1_1_3_to_1_1_4 Porting from FLAC 1.1.3 to 1.1.4
@@ -357,6 +361,85 @@
  * \c FLAC__FRAME_HEADER_BLOCKING_STRATEGY_LEN
  */
 
+/** \defgroup porting_1_3_4_to_1_4_0 Porting from FLAC 1.3.4 to 1.4.0
+ *  \ingroup porting
+ *
+ *  \brief
+ *  This module describes porting from FLAC 1.3.4 to FLAC 1.4.0.
+ *
+ * \section porting_1_3_4_to_1_4_0_summary Summary
+ *
+ * Between FLAC 1.3.4 and FLAC 1.4.0, there have four breaking changes
+ * - the function get_client_data_from_decoder has been renamed to
+ *   FLAC__get_decoder_client_data
+ * - some data types in the FLAC__Frame struct have changed
+ * - all functions resizing metadata blocks now return the object
+ *   untouched if memory allocation fails, whereas previously the
+ *   handling varied and was more or less undefined
+ * - all functions accepting a filename now take UTF-8 encoded filenames
+ *   on Windows instead of filenames in the current codepage
+ *
+ * Furthermore, there have been the following additions
+ * - the functions FLAC__stream_encoder_set_limit_min_bitrate,
+ *   FLAC__stream_encoder_get_limit_min_bitrate,
+ *   FLAC::encoder::file::set_limit_min_bitrate() and
+ *   FLAC::encoder::file::get_limit_min_bitrate() have been added
+ * - Added FLAC__STREAM_DECODER_ERROR_STATUS_BAD_METADATA to the
+ *   FLAC__StreamDecoderErrorStatus enum
+ *
+ * \section porting_1_3_4_to_1_4_0_breaking Breaking changes
+ *
+ * The function \b get_client_data_from_decoder was added in FLAC 1.3.3
+ * but did not follow the API naming convention and was not properly
+ * exported. The function is now renamed and properly integrated as
+ * FLAC__stream_decoder_get_client_data
+ *
+ * To accomodate encoding and decoding 32-bit int PCM, some data types
+ * in the \b FLAC__frame struct were changed. Specifically, warmup
+ * in both the FLAC__Subframe_Fixed struc and the FLAC__Subframe_LPC
+ * struct is changed from FLAC__int32 to FLAC__int64. Also, value
+ * in the FLAC__Subframe_Constant is changed from FLAC__int32 to
+ * FLAC__int64. Finally, in FLAC__Subframe_Verbatim struct data is
+ * changes from a FLAC__int32 array to a union containing a FLAC__int32
+ * array and a FLAC__int64 array. Also, a new member is added,
+ * data_type, which clarifies whether the FLAC__int32 or FLAC__int64
+ * array is in use.
+ *
+ * Furthermore, the following functions now return the object untouched
+ * if memory allocation fails, whereas previously the handling varied
+ * and was more or less undefined
+ *
+ * - FLAC__metadata_object_seektable_resize_points
+ * - FLAC__metadata_object_vorbiscomment_resize_comments
+ * - FLAC__metadata_object_cuesheet_track_resize_indices
+ * - FLAC__metadata_object_cuesheet_resize_tracks
+ *
+ * The last breaking change is that all API functions taking a filename
+ * as an argument now, on Windows, must be supplied with that filename
+ * in the UTF-8 character encoding instead of using the current code
+ * page. libFLAC internally translates these UTF-8 encoded filenames to
+ * an appropriate representation to use with _wfopen. On all other
+ * systems, filename is passed to fopen without any translation, as it
+ * in libFLAC 1.3.4 and earlier.
+ *
+ * \section porting_1_3_4_to_1_4_0_additions Additions
+ *
+ * To aid in creating properly streamable FLAC files, a set of functions
+ * was added to make it possible to enfore a minimum bitrate to files
+ * created through libFLAC's stream_encoder.h interface. With this
+ * function enabled the resulting FLAC files have a minimum bitrate of
+ * 1bit/sample independent of the number of channels, i.e. 48kbit/s for
+ * 48kHz. This can be beneficial for streaming, as very low bitrates for
+ * silent sections compressed with 'constant' subframes can result in a
+ * bitrate of 1kbit/s, creating problems with clients that aren't aware
+ * of this possibility and buffer too much data.
+ *
+ * Finally, FLAC__STREAM_DECODER_ERROR_STATUS_BAD_METADATA was added to
+ * the FLAC__StreamDecoderErrorStatus enum to signal that the decoder
+ * encountered unreadable metadata.
+ *
+ */
+
 /** \defgroup flac FLAC C API
  *
  * The FLAC C API is the interface to libFLAC, a set of structures

diff --git a/include/FLAC/assert.h b/include/FLAC/assert.h
index dc9bcef..aee23c8 100644
--- a/include/FLAC/assert.h
+++ b/include/FLAC/assert.h

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2001-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -34,7 +34,7 @@
 #define FLAC__ASSERT_H
 
 /* we need this since some compilers (like MSVC) leave assert()s on release code (and we don't want to use their ASSERT) */
-#ifdef DEBUG
+#ifndef NDEBUG
 #include <assert.h>
 #define FLAC__ASSERT(x) assert(x)
 #define FLAC__ASSERT_DECLARATION(x) x

diff --git a/include/FLAC/callback.h b/include/FLAC/callback.h
index ce8787f..f323687 100644
--- a/include/FLAC/callback.h
+++ b/include/FLAC/callback.h

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2004-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -165,15 +165,15 @@
  *  required may be set to NULL.
  *
  *  If the seek requirement for an interface is optional, you can signify that
- *  a data sorce is not seekable by setting the \a seek field to \c NULL.
+ *  a data source is not seekable by setting the \a seek field to \c NULL.
  */
 typedef struct {
-	FLAC__IOCallback_Read read;
-	FLAC__IOCallback_Write write;
-	FLAC__IOCallback_Seek seek;
-	FLAC__IOCallback_Tell tell;
-	FLAC__IOCallback_Eof eof;
-	FLAC__IOCallback_Close close;
+  FLAC__IOCallback_Read read;   /**< See FLAC__IOCallbacks */
+  FLAC__IOCallback_Write write; /**< See FLAC__IOCallbacks */
+  FLAC__IOCallback_Seek seek;   /**< See FLAC__IOCallbacks */
+  FLAC__IOCallback_Tell tell;   /**< See FLAC__IOCallbacks */
+  FLAC__IOCallback_Eof eof;     /**< See FLAC__IOCallbacks */
+  FLAC__IOCallback_Close close; /**< See FLAC__IOCallbacks */
 } FLAC__IOCallbacks;
 
 /* \} */

diff --git a/include/FLAC/export.h b/include/FLAC/export.h
index 9cc9e13..a21aac3 100644
--- a/include/FLAC/export.h
+++ b/include/FLAC/export.h

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2000-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -36,7 +36,7 @@
 /** \file include/FLAC/export.h
  *
  *  \brief
- *  This module contains #defines and symbols for exporting function
+ *  This module contains \#defines and symbols for exporting function
  *  calls, and providing version information and compiled-in features.
  *
  *  See the \link flac_export export \endlink module.
@@ -46,25 +46,43 @@
  *  \ingroup flac
  *
  *  \brief
- *  This module contains #defines and symbols for exporting function
+ *  This module contains \#defines and symbols for exporting function
  *  calls, and providing version information and compiled-in features.
  *
- *  If you are compiling with MSVC and will link to the static library
- *  (libFLAC.lib) you should define FLAC__NO_DLL in your project to
- *  make sure the symbols are exported properly.
+ *  If you are compiling for Windows (with Visual Studio or MinGW for
+ *  example) and will link to the static library (libFLAC++.lib) you
+ *  should define FLAC__NO_DLL in your project to make sure the symbols
+ *  are exported properly.
  *
  * \{
  */
 
-#if defined(FLAC__NO_DLL)
-#define FLAC_API
+/** This \#define is used internally in libFLAC and its headers to make
+ * sure the correct symbols are exported when working with shared
+ * libraries. On Windows, this \#define is set to __declspec(dllexport)
+ * when compiling libFLAC into a library and to __declspec(dllimport)
+ * when the headers are used to link to that DLL. On non-Windows systems
+ * it is used to set symbol visibility.
+ *
+ * Because of this, the define FLAC__NO_DLL must be defined when linking
+ * to libFLAC statically or linking will fail.
+ */
+/* This has grown quite complicated. FLAC__NO_DLL is used by MSVC sln
+ * files and CMake, which build either static or shared. autotools can
+ * build static, shared or **both**. Therefore, DLL_EXPORT, which is set
+ * by libtool, must override FLAC__NO_DLL on building shared components
+ */
+#if defined(_WIN32)
 
-#elif defined(_MSC_VER)
+#if defined(FLAC__NO_DLL) && !(defined(DLL_EXPORT))
+#define FLAC_API
+#else
 #ifdef FLAC_API_EXPORTS
 #define	FLAC_API __declspec(dllexport)
 #else
 #define FLAC_API __declspec(dllimport)
 #endif
+#endif
 
 #elif defined(FLAC__USE_VISIBILITY_ATTR)
 #define FLAC_API __attribute__ ((visibility ("default")))
@@ -74,12 +92,12 @@
 
 #endif
 
-/** These #defines will mirror the libtool-based library version number, see
+/** These \#defines will mirror the libtool-based library version number, see
  * http://www.gnu.org/software/libtool/manual/libtool.html#Libtool-versioning
  */
-#define FLAC_API_VERSION_CURRENT 11
+#define FLAC_API_VERSION_CURRENT 12
 #define FLAC_API_VERSION_REVISION 0 /**< see above */
-#define FLAC_API_VERSION_AGE 3 /**< see above */
+#define FLAC_API_VERSION_AGE 0      /**< see above */
 
 #ifdef __cplusplus
 extern "C" {

diff --git a/include/FLAC/format.h b/include/FLAC/format.h
index 7424565..2ec2fa7 100644
--- a/include/FLAC/format.h
+++ b/include/FLAC/format.h

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2000-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -60,9 +60,9 @@
  *  structures used by the rest of the interfaces.
  *
  *  First, you should be familiar with the
- *  <A HREF="../format.html">FLAC format</A>.  Many of the values here
- *  follow directly from the specification.  As a user of libFLAC, the
- *  interesting parts really are the structures that describe the frame
+ *  <A HREF="https://xiph.org/flac/format.html">FLAC format</A>.  Many of the
+ * values here follow directly from the specification.  As a user of libFLAC,
+ * the interesting parts really are the structures that describe the frame
  *  header and metadata blocks.
  *
  *  The format structures here are very primitive, designed to store
@@ -83,7 +83,6 @@
  * \{
  */
 
-
 /*
 	Most of the values described in this file are defined by the FLAC
 	format specification.  There is nothing to tune here.
@@ -113,19 +112,16 @@
 
 /** The maximum sample resolution permitted by libFLAC.
  *
- * \warning
  * FLAC__MAX_BITS_PER_SAMPLE is the limit of the FLAC format.  However,
- * the reference encoder/decoder is currently limited to 24 bits because
- * of prevalent 32-bit math, so make sure and use this value when
- * appropriate.
+ * the reference encoder/decoder used to be limited to 24 bits. This
+ * value was used to signal that limit.
  */
-#define FLAC__REFERENCE_CODEC_MAX_BITS_PER_SAMPLE (24u)
+#define FLAC__REFERENCE_CODEC_MAX_BITS_PER_SAMPLE (32u)
 
 /** The maximum sample rate permitted by the format.  The value is
- *  ((2 ^ 16) - 1) * 10; see <A HREF="../format.html">FLAC format</A>
- *  as to why.
+ *  ((2 ^ 20) - 1)
  */
-#define FLAC__MAX_SAMPLE_RATE (655350u)
+#define FLAC__MAX_SAMPLE_RATE (1048575u)
 
 /** The maximum LPC order permitted by the format. */
 #define FLAC__MAX_LPC_ORDER (32u)
@@ -173,10 +169,10 @@
 /** The 32-bit integer big-endian representation of the beginning of
  *  a FLAC stream.
  */
-extern FLAC_API const unsigned FLAC__STREAM_SYNC; /* = 0x664C6143 */
+extern FLAC_API const uint32_t FLAC__STREAM_SYNC; /* = 0x664C6143 */
 
 /** The length of the FLAC signature in bits. */
-extern FLAC_API const unsigned FLAC__STREAM_SYNC_LEN; /* = 32 bits */
+extern FLAC_API const uint32_t FLAC__STREAM_SYNC_LEN; /* = 32 bits */
 
 /** The length of the FLAC signature in bytes. */
 #define FLAC__STREAM_SYNC_LENGTH (4u)
@@ -212,45 +208,54 @@
 /** Contents of a Rice partitioned residual
  */
 typedef struct {
+  uint32_t* parameters;
+  /**< The Rice parameters for each context. */
 
-	unsigned *parameters;
-	/**< The Rice parameters for each context. */
+  uint32_t* raw_bits;
+  /**< Widths for escape-coded partitions.  Will be non-zero for escaped
+   * partitions and zero for unescaped partitions.
+   */
 
-	unsigned *raw_bits;
-	/**< Widths for escape-coded partitions.  Will be non-zero for escaped
-	 * partitions and zero for unescaped partitions.
-	 */
-
-	unsigned capacity_by_order;
-	/**< The capacity of the \a parameters and \a raw_bits arrays
-	 * specified as an order, i.e. the number of array elements
-	 * allocated is 2 ^ \a capacity_by_order.
-	 */
+  uint32_t capacity_by_order;
+  /**< The capacity of the \a parameters and \a raw_bits arrays
+   * specified as an order, i.e. the number of array elements
+   * allocated is 2 ^ \a capacity_by_order.
+   */
 } FLAC__EntropyCodingMethod_PartitionedRiceContents;
 
-/** Header for a Rice partitioned residual.  (c.f. <A HREF="../format.html#partitioned_rice">format specification</A>)
+/** Header for a Rice partitioned residual.  (c.f. <A
+ * HREF="https://xiph.org/flac/format.html#partitioned_rice">format
+ * specification</A>)
  */
 typedef struct {
+  uint32_t order;
+  /**< The partition order, i.e. # of contexts = 2 ^ \a order. */
 
-	unsigned order;
-	/**< The partition order, i.e. # of contexts = 2 ^ \a order. */
-
-	const FLAC__EntropyCodingMethod_PartitionedRiceContents *contents;
-	/**< The context's Rice parameters and/or raw bits. */
+  const FLAC__EntropyCodingMethod_PartitionedRiceContents* contents;
+  /**< The context's Rice parameters and/or raw bits. */
 
 } FLAC__EntropyCodingMethod_PartitionedRice;
 
-extern FLAC_API const unsigned FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ORDER_LEN; /**< == 4 (bits) */
-extern FLAC_API const unsigned FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_PARAMETER_LEN; /**< == 4 (bits) */
-extern FLAC_API const unsigned FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_PARAMETER_LEN; /**< == 5 (bits) */
-extern FLAC_API const unsigned FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_RAW_LEN; /**< == 5 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ORDER_LEN; /**< == 4 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_PARAMETER_LEN; /**< == 4 (bits)
+                                                                 */
+extern FLAC_API const uint32_t
+    FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_PARAMETER_LEN; /**< == 5
+                                                                    (bits) */
+extern FLAC_API const uint32_t
+    FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_RAW_LEN; /**< == 5 (bits) */
 
-extern FLAC_API const unsigned FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ESCAPE_PARAMETER;
+extern FLAC_API const uint32_t
+    FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ESCAPE_PARAMETER;
 /**< == (1<<FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_PARAMETER_LEN)-1 */
-extern FLAC_API const unsigned FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_ESCAPE_PARAMETER;
+extern FLAC_API const uint32_t
+    FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_ESCAPE_PARAMETER;
 /**< == (1<<FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_PARAMETER_LEN)-1 */
 
-/** Header for the entropy coding method.  (c.f. <A HREF="../format.html#residual">format specification</A>)
+/** Header for the entropy coding method.  (c.f. <A
+ * HREF="https://xiph.org/flac/format.html#residual">format specification</A>)
  */
 typedef struct {
 	FLAC__EntropyCodingMethodType type;
@@ -259,7 +264,8 @@
 	} data;
 } FLAC__EntropyCodingMethod;
 
-extern FLAC_API const unsigned FLAC__ENTROPY_CODING_METHOD_TYPE_LEN; /**< == 2 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__ENTROPY_CODING_METHOD_TYPE_LEN; /**< == 2 (bits) */
 
 /*****************************************************************************/
 
@@ -278,68 +284,88 @@
  */
 extern FLAC_API const char * const FLAC__SubframeTypeString[];
 
-
-/** CONSTANT subframe.  (c.f. <A HREF="../format.html#subframe_constant">format specification</A>)
+/** CONSTANT subframe.  (c.f. <A
+ * HREF="https://xiph.org/flac/format.html#subframe_constant">format
+ * specification</A>)
  */
 typedef struct {
-	FLAC__int32 value; /**< The constant signal value. */
+        FLAC__int64 value; /**< The constant signal value. */
 } FLAC__Subframe_Constant;
 
+/** An enumeration of the possible verbatim subframe data types. */
+typedef enum {
+  FLAC__VERBATIM_SUBFRAME_DATA_TYPE_INT32, /**< verbatim subframe has 32-bit int
+                                            */
+  FLAC__VERBATIM_SUBFRAME_DATA_TYPE_INT64  /**< verbatim subframe has 64-bit int
+                                            */
+} FLAC__VerbatimSubframeDataType;
 
-/** VERBATIM subframe.  (c.f. <A HREF="../format.html#subframe_verbatim">format specification</A>)
+/** VERBATIM subframe.  (c.f. <A
+ * HREF="https://xiph.org/flac/format.html#subframe_verbatim">format
+ * specification</A>)
  */
 typedef struct {
-	const FLAC__int32 *data; /**< A pointer to verbatim signal. */
+        union {
+                const FLAC__int32*
+                    int32; /**< A FLAC__int32 pointer to verbatim signal. */
+                const FLAC__int64*
+                    int64; /**< A FLAC__int64 pointer to verbatim signal. */
+        } data;
+        FLAC__VerbatimSubframeDataType data_type;
 } FLAC__Subframe_Verbatim;
 
-
-/** FIXED subframe.  (c.f. <A HREF="../format.html#subframe_fixed">format specification</A>)
+/** FIXED subframe.  (c.f. <A
+ * HREF="https://xiph.org/flac/format.html#subframe_fixed">format
+ * specification</A>)
  */
 typedef struct {
 	FLAC__EntropyCodingMethod entropy_coding_method;
 	/**< The residual coding method. */
 
-	unsigned order;
-	/**< The polynomial order. */
+        uint32_t order;
+        /**< The polynomial order. */
 
-	FLAC__int32 warmup[FLAC__MAX_FIXED_ORDER];
-	/**< Warmup samples to prime the predictor, length == order. */
+        FLAC__int64 warmup[FLAC__MAX_FIXED_ORDER];
+        /**< Warmup samples to prime the predictor, length == order. */
 
-	const FLAC__int32 *residual;
+        const FLAC__int32 *residual;
 	/**< The residual signal, length == (blocksize minus order) samples. */
 } FLAC__Subframe_Fixed;
 
-
-/** LPC subframe.  (c.f. <A HREF="../format.html#subframe_lpc">format specification</A>)
+/** LPC subframe.  (c.f. <A
+ * HREF="https://xiph.org/flac/format.html#subframe_lpc">format
+ * specification</A>)
  */
 typedef struct {
 	FLAC__EntropyCodingMethod entropy_coding_method;
 	/**< The residual coding method. */
 
-	unsigned order;
-	/**< The FIR order. */
+        uint32_t order;
+        /**< The FIR order. */
 
-	unsigned qlp_coeff_precision;
-	/**< Quantized FIR filter coefficient precision in bits. */
+        uint32_t qlp_coeff_precision;
+        /**< Quantized FIR filter coefficient precision in bits. */
 
-	int quantization_level;
+        int quantization_level;
 	/**< The qlp coeff shift needed. */
 
 	FLAC__int32 qlp_coeff[FLAC__MAX_LPC_ORDER];
 	/**< FIR filter coefficients. */
 
-	FLAC__int32 warmup[FLAC__MAX_LPC_ORDER];
-	/**< Warmup samples to prime the predictor, length == order. */
+        FLAC__int64 warmup[FLAC__MAX_LPC_ORDER];
+        /**< Warmup samples to prime the predictor, length == order. */
 
-	const FLAC__int32 *residual;
+        const FLAC__int32 *residual;
 	/**< The residual signal, length == (blocksize minus order) samples. */
 } FLAC__Subframe_LPC;
 
-extern FLAC_API const unsigned FLAC__SUBFRAME_LPC_QLP_COEFF_PRECISION_LEN; /**< == 4 (bits) */
-extern FLAC_API const unsigned FLAC__SUBFRAME_LPC_QLP_SHIFT_LEN; /**< == 5 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__SUBFRAME_LPC_QLP_COEFF_PRECISION_LEN; /**< == 4 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__SUBFRAME_LPC_QLP_SHIFT_LEN; /**< == 5 (bits) */
 
-
-/** FLAC subframe structure.  (c.f. <A HREF="../format.html#subframe">format specification</A>)
+/** FLAC subframe structure.  (c.f. <A
+ * HREF="https://xiph.org/flac/format.html#subframe">format specification</A>)
  */
 typedef struct {
 	FLAC__SubframeType type;
@@ -349,7 +375,7 @@
 		FLAC__Subframe_LPC lpc;
 		FLAC__Subframe_Verbatim verbatim;
 	} data;
-	unsigned wasted_bits;
+        uint32_t wasted_bits;
 } FLAC__Subframe;
 
 /** == 1 (bit)
@@ -359,14 +385,19 @@
  * mandatory value of \c 0 but in the future may take on the value \c 0 or \c 1
  * to mean something else.
  */
-extern FLAC_API const unsigned FLAC__SUBFRAME_ZERO_PAD_LEN;
-extern FLAC_API const unsigned FLAC__SUBFRAME_TYPE_LEN; /**< == 6 (bits) */
-extern FLAC_API const unsigned FLAC__SUBFRAME_WASTED_BITS_FLAG_LEN; /**< == 1 (bit) */
+extern FLAC_API const uint32_t FLAC__SUBFRAME_ZERO_PAD_LEN;
+extern FLAC_API const uint32_t FLAC__SUBFRAME_TYPE_LEN; /**< == 6 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__SUBFRAME_WASTED_BITS_FLAG_LEN; /**< == 1 (bit) */
 
-extern FLAC_API const unsigned FLAC__SUBFRAME_TYPE_CONSTANT_BYTE_ALIGNED_MASK; /**< = 0x00 */
-extern FLAC_API const unsigned FLAC__SUBFRAME_TYPE_VERBATIM_BYTE_ALIGNED_MASK; /**< = 0x02 */
-extern FLAC_API const unsigned FLAC__SUBFRAME_TYPE_FIXED_BYTE_ALIGNED_MASK; /**< = 0x10 */
-extern FLAC_API const unsigned FLAC__SUBFRAME_TYPE_LPC_BYTE_ALIGNED_MASK; /**< = 0x40 */
+extern FLAC_API const uint32_t
+    FLAC__SUBFRAME_TYPE_CONSTANT_BYTE_ALIGNED_MASK; /**< = 0x00 */
+extern FLAC_API const uint32_t
+    FLAC__SUBFRAME_TYPE_VERBATIM_BYTE_ALIGNED_MASK; /**< = 0x02 */
+extern FLAC_API const uint32_t
+    FLAC__SUBFRAME_TYPE_FIXED_BYTE_ALIGNED_MASK; /**< = 0x10 */
+extern FLAC_API const uint32_t
+    FLAC__SUBFRAME_TYPE_LPC_BYTE_ALIGNED_MASK; /**< = 0x40 */
 
 /*****************************************************************************/
 
@@ -405,26 +436,27 @@
  */
 extern FLAC_API const char * const FLAC__FrameNumberTypeString[];
 
-
-/** FLAC frame header structure.  (c.f. <A HREF="../format.html#frame_header">format specification</A>)
+/** FLAC frame header structure.  (c.f. <A
+ * HREF="https://xiph.org/flac/format.html#frame_header">format
+ * specification</A>)
  */
 typedef struct {
-	unsigned blocksize;
-	/**< The number of samples per subframe. */
+        uint32_t blocksize;
+        /**< The number of samples per subframe. */
 
-	unsigned sample_rate;
-	/**< The sample rate in Hz. */
+        uint32_t sample_rate;
+        /**< The sample rate in Hz. */
 
-	unsigned channels;
-	/**< The number of channels (== number of subframes). */
+        uint32_t channels;
+        /**< The number of channels (== number of subframes). */
 
-	FLAC__ChannelAssignment channel_assignment;
+        FLAC__ChannelAssignment channel_assignment;
 	/**< The channel assignment for the frame. */
 
-	unsigned bits_per_sample;
-	/**< The sample resolution. */
+        uint32_t bits_per_sample;
+        /**< The sample resolution. */
 
-	FLAC__FrameNumberType number_type;
+        FLAC__FrameNumberType number_type;
 	/**< The numbering scheme used for the frame.  As a convenience, the
 	 * decoder will always convert a frame number to a sample number because
 	 * the rules are complex. */
@@ -443,19 +475,28 @@
 	 */
 } FLAC__FrameHeader;
 
-extern FLAC_API const unsigned FLAC__FRAME_HEADER_SYNC; /**< == 0x3ffe; the frame header sync code */
-extern FLAC_API const unsigned FLAC__FRAME_HEADER_SYNC_LEN; /**< == 14 (bits) */
-extern FLAC_API const unsigned FLAC__FRAME_HEADER_RESERVED_LEN; /**< == 1 (bits) */
-extern FLAC_API const unsigned FLAC__FRAME_HEADER_BLOCKING_STRATEGY_LEN; /**< == 1 (bits) */
-extern FLAC_API const unsigned FLAC__FRAME_HEADER_BLOCK_SIZE_LEN; /**< == 4 (bits) */
-extern FLAC_API const unsigned FLAC__FRAME_HEADER_SAMPLE_RATE_LEN; /**< == 4 (bits) */
-extern FLAC_API const unsigned FLAC__FRAME_HEADER_CHANNEL_ASSIGNMENT_LEN; /**< == 4 (bits) */
-extern FLAC_API const unsigned FLAC__FRAME_HEADER_BITS_PER_SAMPLE_LEN; /**< == 3 (bits) */
-extern FLAC_API const unsigned FLAC__FRAME_HEADER_ZERO_PAD_LEN; /**< == 1 (bit) */
-extern FLAC_API const unsigned FLAC__FRAME_HEADER_CRC_LEN; /**< == 8 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__FRAME_HEADER_SYNC; /**< == 0x3ffe; the frame header sync code */
+extern FLAC_API const uint32_t FLAC__FRAME_HEADER_SYNC_LEN; /**< == 14 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__FRAME_HEADER_RESERVED_LEN; /**< == 1 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__FRAME_HEADER_BLOCKING_STRATEGY_LEN; /**< == 1 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__FRAME_HEADER_BLOCK_SIZE_LEN; /**< == 4 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__FRAME_HEADER_SAMPLE_RATE_LEN; /**< == 4 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__FRAME_HEADER_CHANNEL_ASSIGNMENT_LEN; /**< == 4 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__FRAME_HEADER_BITS_PER_SAMPLE_LEN; /**< == 3 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__FRAME_HEADER_ZERO_PAD_LEN;                       /**< == 1 (bit) */
+extern FLAC_API const uint32_t FLAC__FRAME_HEADER_CRC_LEN; /**< == 8 (bits) */
 
-
-/** FLAC frame footer structure.  (c.f. <A HREF="../format.html#frame_footer">format specification</A>)
+/** FLAC frame footer structure.  (c.f. <A
+ * HREF="https://xiph.org/flac/format.html#frame_footer">format
+ * specification</A>)
  */
 typedef struct {
 	FLAC__uint16 crc;
@@ -465,10 +506,10 @@
 	 */
 } FLAC__FrameFooter;
 
-extern FLAC_API const unsigned FLAC__FRAME_FOOTER_CRC_LEN; /**< == 16 (bits) */
+extern FLAC_API const uint32_t FLAC__FRAME_FOOTER_CRC_LEN; /**< == 16 (bits) */
 
-
-/** FLAC frame structure.  (c.f. <A HREF="../format.html#frame">format specification</A>)
+/** FLAC frame structure.  (c.f. <A
+ * HREF="https://xiph.org/flac/format.html#frame">format specification</A>)
  */
 typedef struct {
 	FLAC__FrameHeader header;
@@ -488,32 +529,48 @@
 /** An enumeration of the available metadata block types. */
 typedef enum {
 
-	FLAC__METADATA_TYPE_STREAMINFO = 0,
-	/**< <A HREF="../format.html#metadata_block_streaminfo">STREAMINFO</A> block */
+  FLAC__METADATA_TYPE_STREAMINFO = 0,
+  /**< <A
+     HREF="https://xiph.org/flac/format.html#metadata_block_streaminfo">STREAMINFO</A>
+     block */
 
-	FLAC__METADATA_TYPE_PADDING = 1,
-	/**< <A HREF="../format.html#metadata_block_padding">PADDING</A> block */
+  FLAC__METADATA_TYPE_PADDING = 1,
+  /**< <A
+     HREF="https://xiph.org/flac/format.html#metadata_block_padding">PADDING</A>
+     block */
 
-	FLAC__METADATA_TYPE_APPLICATION = 2,
-	/**< <A HREF="../format.html#metadata_block_application">APPLICATION</A> block */
+  FLAC__METADATA_TYPE_APPLICATION = 2,
+  /**< <A
+     HREF="https://xiph.org/flac/format.html#metadata_block_application">APPLICATION</A>
+     block */
 
-	FLAC__METADATA_TYPE_SEEKTABLE = 3,
-	/**< <A HREF="../format.html#metadata_block_seektable">SEEKTABLE</A> block */
+  FLAC__METADATA_TYPE_SEEKTABLE = 3,
+  /**< <A
+     HREF="https://xiph.org/flac/format.html#metadata_block_seektable">SEEKTABLE</A>
+     block */
 
-	FLAC__METADATA_TYPE_VORBIS_COMMENT = 4,
-	/**< <A HREF="../format.html#metadata_block_vorbis_comment">VORBISCOMMENT</A> block (a.k.a. FLAC tags) */
+  FLAC__METADATA_TYPE_VORBIS_COMMENT = 4,
+  /**< <A
+     HREF="https://xiph.org/flac/format.html#metadata_block_vorbis_comment">VORBISCOMMENT</A>
+     block (a.k.a. FLAC tags) */
 
-	FLAC__METADATA_TYPE_CUESHEET = 5,
-	/**< <A HREF="../format.html#metadata_block_cuesheet">CUESHEET</A> block */
+  FLAC__METADATA_TYPE_CUESHEET = 5,
+  /**< <A
+     HREF="https://xiph.org/flac/format.html#metadata_block_cuesheet">CUESHEET</A>
+     block */
 
-	FLAC__METADATA_TYPE_PICTURE = 6,
-	/**< <A HREF="../format.html#metadata_block_picture">PICTURE</A> block */
+  FLAC__METADATA_TYPE_PICTURE = 6,
+  /**< <A
+     HREF="https://xiph.org/flac/format.html#metadata_block_picture">PICTURE</A>
+     block */
 
-	FLAC__METADATA_TYPE_UNDEFINED = 7,
-	/**< marker to denote beginning of undefined type range; this number will increase as new metadata types are added */
+  FLAC__METADATA_TYPE_UNDEFINED = 7,
+  /**< marker to denote beginning of undefined type range; this number will
+     increase as new metadata types are added */
 
-        FLAC__MAX_METADATA_TYPE = FLAC__MAX_METADATA_TYPE_CODE,
-        /**< No type will ever be greater than this. There is not enough room in the protocol block. */
+  FLAC__MAX_METADATA_TYPE = FLAC__MAX_METADATA_TYPE_CODE,
+  /**< No type will ever be greater than this. There is not enough room in the
+     protocol block. */
 } FLAC__MetadataType;
 
 /** Maps a FLAC__MetadataType to a C string.
@@ -523,33 +580,45 @@
  */
 extern FLAC_API const char * const FLAC__MetadataTypeString[];
 
-
-/** FLAC STREAMINFO structure.  (c.f. <A HREF="../format.html#metadata_block_streaminfo">format specification</A>)
+/** FLAC STREAMINFO structure.  (c.f. <A
+ * HREF="https://xiph.org/flac/format.html#metadata_block_streaminfo">format
+ * specification</A>)
  */
 typedef struct {
-	unsigned min_blocksize, max_blocksize;
-	unsigned min_framesize, max_framesize;
-	unsigned sample_rate;
-	unsigned channels;
-	unsigned bits_per_sample;
-	FLAC__uint64 total_samples;
-	FLAC__byte md5sum[16];
+        uint32_t min_blocksize, max_blocksize;
+        uint32_t min_framesize, max_framesize;
+        uint32_t sample_rate;
+        uint32_t channels;
+        uint32_t bits_per_sample;
+        FLAC__uint64 total_samples;
+        FLAC__byte md5sum[16];
 } FLAC__StreamMetadata_StreamInfo;
 
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_STREAMINFO_MIN_BLOCK_SIZE_LEN; /**< == 16 (bits) */
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_STREAMINFO_MAX_BLOCK_SIZE_LEN; /**< == 16 (bits) */
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_STREAMINFO_MIN_FRAME_SIZE_LEN; /**< == 24 (bits) */
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_STREAMINFO_MAX_FRAME_SIZE_LEN; /**< == 24 (bits) */
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_STREAMINFO_SAMPLE_RATE_LEN; /**< == 20 (bits) */
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_STREAMINFO_CHANNELS_LEN; /**< == 3 (bits) */
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_STREAMINFO_BITS_PER_SAMPLE_LEN; /**< == 5 (bits) */
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_STREAMINFO_TOTAL_SAMPLES_LEN; /**< == 36 (bits) */
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_STREAMINFO_MD5SUM_LEN; /**< == 128 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_STREAMINFO_MIN_BLOCK_SIZE_LEN; /**< == 16 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_STREAMINFO_MAX_BLOCK_SIZE_LEN; /**< == 16 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_STREAMINFO_MIN_FRAME_SIZE_LEN; /**< == 24 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_STREAMINFO_MAX_FRAME_SIZE_LEN; /**< == 24 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_STREAMINFO_SAMPLE_RATE_LEN; /**< == 20 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_STREAMINFO_CHANNELS_LEN; /**< == 3 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_STREAMINFO_BITS_PER_SAMPLE_LEN; /**< == 5 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_STREAMINFO_TOTAL_SAMPLES_LEN; /**< == 36 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_STREAMINFO_MD5SUM_LEN; /**< == 128 (bits) */
 
 /** The total stream length of the STREAMINFO block in bytes. */
 #define FLAC__STREAM_METADATA_STREAMINFO_LENGTH (34u)
 
-/** FLAC PADDING structure.  (c.f. <A HREF="../format.html#metadata_block_padding">format specification</A>)
+/** FLAC PADDING structure.  (c.f. <A
+ * HREF="https://xiph.org/flac/format.html#metadata_block_padding">format
+ * specification</A>)
  */
 typedef struct {
 	int dummy;
@@ -559,17 +628,20 @@
 	 */
 } FLAC__StreamMetadata_Padding;
 
-
-/** FLAC APPLICATION structure.  (c.f. <A HREF="../format.html#metadata_block_application">format specification</A>)
+/** FLAC APPLICATION structure.  (c.f. <A
+ * HREF="https://xiph.org/flac/format.html#metadata_block_application">format
+ * specification</A>)
  */
 typedef struct {
 	FLAC__byte id[4];
 	FLAC__byte *data;
 } FLAC__StreamMetadata_Application;
 
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_APPLICATION_ID_LEN; /**< == 32 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_APPLICATION_ID_LEN; /**< == 32 (bits) */
 
-/** SeekPoint structure used in SEEKTABLE blocks.  (c.f. <A HREF="../format.html#seekpoint">format specification</A>)
+/** SeekPoint structure used in SEEKTABLE blocks.  (c.f. <A
+ * HREF="https://xiph.org/flac/format.html#seekpoint">format specification</A>)
  */
 typedef struct {
 	FLAC__uint64 sample_number;
@@ -579,13 +651,16 @@
 	/**< The offset, in bytes, of the target frame with respect to
 	 * beginning of the first frame. */
 
-	unsigned frame_samples;
-	/**< The number of samples in the target frame. */
+        uint32_t frame_samples;
+        /**< The number of samples in the target frame. */
 } FLAC__StreamMetadata_SeekPoint;
 
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_SEEKPOINT_SAMPLE_NUMBER_LEN; /**< == 64 (bits) */
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_SEEKPOINT_STREAM_OFFSET_LEN; /**< == 64 (bits) */
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_SEEKPOINT_FRAME_SAMPLES_LEN; /**< == 16 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_SEEKPOINT_SAMPLE_NUMBER_LEN; /**< == 64 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_SEEKPOINT_STREAM_OFFSET_LEN; /**< == 64 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_SEEKPOINT_FRAME_SAMPLES_LEN; /**< == 16 (bits) */
 
 /** The total stream length of a seek point in bytes. */
 #define FLAC__STREAM_METADATA_SEEKPOINT_LENGTH (18u)
@@ -596,8 +671,9 @@
  */
 extern FLAC_API const FLAC__uint64 FLAC__STREAM_METADATA_SEEKPOINT_PLACEHOLDER;
 
-
-/** FLAC SEEKTABLE structure.  (c.f. <A HREF="../format.html#metadata_block_seektable">format specification</A>)
+/** FLAC SEEKTABLE structure.  (c.f. <A
+ * HREF="https://xiph.org/flac/format.html#metadata_block_seektable">format
+ * specification</A>)
  *
  * \note From the format specification:
  * - The seek points must be sorted by ascending sample number.
@@ -610,12 +686,13 @@
  *   present in a stream.
  */
 typedef struct {
-	unsigned num_points;
-	FLAC__StreamMetadata_SeekPoint *points;
+        uint32_t num_points;
+        FLAC__StreamMetadata_SeekPoint* points;
 } FLAC__StreamMetadata_SeekTable;
 
-
-/** Vorbis comment entry structure used in VORBIS_COMMENT blocks.  (c.f. <A HREF="../format.html#metadata_block_vorbis_comment">format specification</A>)
+/** Vorbis comment entry structure used in VORBIS_COMMENT blocks.  (c.f. <A
+ * HREF="https://xiph.org/flac/format.html#metadata_block_vorbis_comment">format
+ * specification</A>)
  *
  *  For convenience, the APIs maintain a trailing NUL character at the end of
  *  \a entry which is not counted toward \a length, i.e.
@@ -626,10 +703,12 @@
 	FLAC__byte *entry;
 } FLAC__StreamMetadata_VorbisComment_Entry;
 
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_VORBIS_COMMENT_ENTRY_LENGTH_LEN; /**< == 32 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_VORBIS_COMMENT_ENTRY_LENGTH_LEN; /**< == 32 (bits) */
 
-
-/** FLAC VORBIS_COMMENT structure.  (c.f. <A HREF="../format.html#metadata_block_vorbis_comment">format specification</A>)
+/** FLAC VORBIS_COMMENT structure.  (c.f. <A
+ * HREF="https://xiph.org/flac/format.html#metadata_block_vorbis_comment">format
+ * specification</A>)
  */
 typedef struct {
 	FLAC__StreamMetadata_VorbisComment_Entry vendor_string;
@@ -637,12 +716,12 @@
 	FLAC__StreamMetadata_VorbisComment_Entry *comments;
 } FLAC__StreamMetadata_VorbisComment;
 
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_VORBIS_COMMENT_NUM_COMMENTS_LEN; /**< == 32 (bits) */
-
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_VORBIS_COMMENT_NUM_COMMENTS_LEN; /**< == 32 (bits) */
 
 /** FLAC CUESHEET track index structure.  (See the
- * <A HREF="../format.html#cuesheet_track_index">format specification</A> for
- * the full description of each field.)
+ * <A HREF="https://xiph.org/flac/format.html#cuesheet_track_index">format
+ * specification</A> for the full description of each field.)
  */
 typedef struct {
 	FLAC__uint64 offset;
@@ -654,14 +733,16 @@
 	/**< The index point number. */
 } FLAC__StreamMetadata_CueSheet_Index;
 
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_INDEX_OFFSET_LEN; /**< == 64 (bits) */
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_INDEX_NUMBER_LEN; /**< == 8 (bits) */
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_INDEX_RESERVED_LEN; /**< == 3*8 (bits) */
-
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_CUESHEET_INDEX_OFFSET_LEN; /**< == 64 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_CUESHEET_INDEX_NUMBER_LEN; /**< == 8 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_CUESHEET_INDEX_RESERVED_LEN; /**< == 3*8 (bits) */
 
 /** FLAC CUESHEET track structure.  (See the
- * <A HREF="../format.html#cuesheet_track">format specification</A> for
- * the full description of each field.)
+ * <A HREF="https://xiph.org/flac/format.html#cuesheet_track">format
+ * specification</A> for the full description of each field.)
  */
 typedef struct {
 	FLAC__uint64 offset;
@@ -673,13 +754,14 @@
 	char isrc[13];
 	/**< Track ISRC.  This is a 12-digit alphanumeric code plus a trailing \c NUL byte */
 
-	unsigned type:1;
-	/**< The track type: 0 for audio, 1 for non-audio. */
+        uint32_t type : 1;
+        /**< The track type: 0 for audio, 1 for non-audio. */
 
-	unsigned pre_emphasis:1;
-	/**< The pre-emphasis flag: 0 for no pre-emphasis, 1 for pre-emphasis. */
+        uint32_t pre_emphasis : 1;
+        /**< The pre-emphasis flag: 0 for no pre-emphasis, 1 for pre-emphasis.
+         */
 
-	FLAC__byte num_indices;
+        FLAC__byte num_indices;
 	/**< The number of track index points. */
 
 	FLAC__StreamMetadata_CueSheet_Index *indices;
@@ -687,18 +769,24 @@
 
 } FLAC__StreamMetadata_CueSheet_Track;
 
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_TRACK_OFFSET_LEN; /**< == 64 (bits) */
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_TRACK_NUMBER_LEN; /**< == 8 (bits) */
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_TRACK_ISRC_LEN; /**< == 12*8 (bits) */
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_TRACK_TYPE_LEN; /**< == 1 (bit) */
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_TRACK_PRE_EMPHASIS_LEN; /**< == 1 (bit) */
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_TRACK_RESERVED_LEN; /**< == 6+13*8 (bits) */
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_TRACK_NUM_INDICES_LEN; /**< == 8 (bits) */
-
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_CUESHEET_TRACK_OFFSET_LEN; /**< == 64 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_CUESHEET_TRACK_NUMBER_LEN; /**< == 8 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_CUESHEET_TRACK_ISRC_LEN; /**< == 12*8 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_CUESHEET_TRACK_TYPE_LEN; /**< == 1 (bit) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_CUESHEET_TRACK_PRE_EMPHASIS_LEN; /**< == 1 (bit) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_CUESHEET_TRACK_RESERVED_LEN; /**< == 6+13*8 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_CUESHEET_TRACK_NUM_INDICES_LEN; /**< == 8 (bits) */
 
 /** FLAC CUESHEET structure.  (See the
- * <A HREF="../format.html#metadata_block_cuesheet">format specification</A>
- * for the full description of each field.)
+ * <A HREF="https://xiph.org/flac/format.html#metadata_block_cuesheet">format
+ * specification</A> for the full description of each field.)
  */
 typedef struct {
 	char media_catalog_number[129];
@@ -713,20 +801,25 @@
 	FLAC__bool is_cd;
 	/**< \c true if CUESHEET corresponds to a Compact Disc, else \c false. */
 
-	unsigned num_tracks;
-	/**< The number of tracks. */
+        uint32_t num_tracks;
+        /**< The number of tracks. */
 
-	FLAC__StreamMetadata_CueSheet_Track *tracks;
+        FLAC__StreamMetadata_CueSheet_Track *tracks;
 	/**< NULL if num_tracks == 0, else pointer to array of tracks. */
 
 } FLAC__StreamMetadata_CueSheet;
 
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_MEDIA_CATALOG_NUMBER_LEN; /**< == 128*8 (bits) */
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_LEAD_IN_LEN; /**< == 64 (bits) */
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_IS_CD_LEN; /**< == 1 (bit) */
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_RESERVED_LEN; /**< == 7+258*8 (bits) */
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_NUM_TRACKS_LEN; /**< == 8 (bits) */
-
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_CUESHEET_MEDIA_CATALOG_NUMBER_LEN; /**< == 128*8
+                                                                (bits) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_CUESHEET_LEAD_IN_LEN; /**< == 64 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_CUESHEET_IS_CD_LEN; /**< == 1 (bit) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_CUESHEET_RESERVED_LEN; /**< == 7+258*8 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_CUESHEET_NUM_TRACKS_LEN; /**< == 8 (bits) */
 
 /** An enumeration of the PICTURE types (see FLAC__StreamMetadataPicture and id3 v2.4 APIC tag). */
 typedef enum {
@@ -763,8 +856,8 @@
 extern FLAC_API const char * const FLAC__StreamMetadata_Picture_TypeString[];
 
 /** FLAC PICTURE structure.  (See the
- * <A HREF="../format.html#metadata_block_picture">format specification</A>
- * for the full description of each field.)
+ * <A HREF="https://xiph.org/flac/format.html#metadata_block_picture">format
+ * specification</A> for the full description of each field.)
  */
 typedef struct {
 	FLAC__StreamMetadata_Picture_Type type;
@@ -810,15 +903,22 @@
 
 } FLAC__StreamMetadata_Picture;
 
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_PICTURE_TYPE_LEN; /**< == 32 (bits) */
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_PICTURE_MIME_TYPE_LENGTH_LEN; /**< == 32 (bits) */
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_PICTURE_DESCRIPTION_LENGTH_LEN; /**< == 32 (bits) */
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_PICTURE_WIDTH_LEN; /**< == 32 (bits) */
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_PICTURE_HEIGHT_LEN; /**< == 32 (bits) */
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_PICTURE_DEPTH_LEN; /**< == 32 (bits) */
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_PICTURE_COLORS_LEN; /**< == 32 (bits) */
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_PICTURE_DATA_LENGTH_LEN; /**< == 32 (bits) */
-
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_PICTURE_TYPE_LEN; /**< == 32 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_PICTURE_MIME_TYPE_LENGTH_LEN; /**< == 32 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_PICTURE_DESCRIPTION_LENGTH_LEN; /**< == 32 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_PICTURE_WIDTH_LEN; /**< == 32 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_PICTURE_HEIGHT_LEN; /**< == 32 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_PICTURE_DEPTH_LEN; /**< == 32 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_PICTURE_COLORS_LEN; /**< == 32 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_PICTURE_DATA_LENGTH_LEN; /**< == 32 (bits) */
 
 /** Structure that is used when a metadata block of unknown type is loaded.
  *  The contents are opaque.  The structure is used only internally to
@@ -828,22 +928,23 @@
 	FLAC__byte *data;
 } FLAC__StreamMetadata_Unknown;
 
-
-/** FLAC metadata block structure.  (c.f. <A HREF="../format.html#metadata_block">format specification</A>)
+/** FLAC metadata block structure.  (c.f. <A
+ * HREF="https://xiph.org/flac/format.html#metadata_block">format
+ * specification</A>)
  */
-typedef struct {
-	FLAC__MetadataType type;
-	/**< The type of the metadata block; used determine which member of the
+typedef struct FLAC__StreamMetadata {
+        FLAC__MetadataType type;
+        /**< The type of the metadata block; used determine which member of the
 	 * \a data union to dereference.  If type >= FLAC__METADATA_TYPE_UNDEFINED
 	 * then \a data.unknown must be used. */
 
 	FLAC__bool is_last;
 	/**< \c true if this metadata block is the last, else \a false */
 
-	unsigned length;
-	/**< Length, in bytes, of the block data as it appears in the stream. */
+        uint32_t length;
+        /**< Length, in bytes, of the block data as it appears in the stream. */
 
-	union {
+        union {
 		FLAC__StreamMetadata_StreamInfo stream_info;
 		FLAC__StreamMetadata_Padding padding;
 		FLAC__StreamMetadata_Application application;
@@ -857,9 +958,12 @@
 	 * to use. */
 } FLAC__StreamMetadata;
 
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_IS_LAST_LEN; /**< == 1 (bit) */
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_TYPE_LEN; /**< == 7 (bits) */
-extern FLAC_API const unsigned FLAC__STREAM_METADATA_LENGTH_LEN; /**< == 24 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_IS_LAST_LEN; /**< == 1 (bit) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_TYPE_LEN; /**< == 7 (bits) */
+extern FLAC_API const uint32_t
+    FLAC__STREAM_METADATA_LENGTH_LEN; /**< == 24 (bits) */
 
 /** The total stream length of a metadata block header in bytes. */
 #define FLAC__STREAM_METADATA_HEADER_LENGTH (4u)
@@ -880,7 +984,7 @@
  *    \c true if the given sample rate conforms to the specification, else
  *    \c false.
  */
-FLAC_API FLAC__bool FLAC__format_sample_rate_is_valid(unsigned sample_rate);
+FLAC_API FLAC__bool FLAC__format_sample_rate_is_valid(uint32_t sample_rate);
 
 /** Tests that a blocksize at the given sample rate is valid for the FLAC
  *  subset.
@@ -892,7 +996,8 @@
  *    \c true if the given blocksize conforms to the specification for the
  *    subset at the given sample rate, else \c false.
  */
-FLAC_API FLAC__bool FLAC__format_blocksize_is_subset(unsigned blocksize, unsigned sample_rate);
+FLAC_API FLAC__bool FLAC__format_blocksize_is_subset(uint32_t blocksize,
+                                                     uint32_t sample_rate);
 
 /** Tests that a sample rate is valid for the FLAC subset.  The subset rules
  *  for valid sample rates are slightly more complex since the rate has to
@@ -903,7 +1008,7 @@
  *    \c true if the given sample rate conforms to the specification for the
  *    subset, else \c false.
  */
-FLAC_API FLAC__bool FLAC__format_sample_rate_is_subset(unsigned sample_rate);
+FLAC_API FLAC__bool FLAC__format_sample_rate_is_subset(uint32_t sample_rate);
 
 /** Check a Vorbis comment entry name to see if it conforms to the Vorbis
  *  comment specification.
@@ -926,14 +1031,16 @@
  *
  * \param value      A string to be checked.
  * \param length     A the length of \a value in bytes.  May be
- *                   \c (unsigned)(-1) to indicate that \a value is a plain
+ *                   \c (uint32_t)(-1) to indicate that \a value is a plain
  *                   UTF-8 NUL-terminated string.
  * \assert
  *    \code value != NULL \endcode
  * \retval FLAC__bool
  *    \c false if entry name is illegal, else \c true.
  */
-FLAC_API FLAC__bool FLAC__format_vorbiscomment_entry_value_is_legal(const FLAC__byte *value, unsigned length);
+FLAC_API FLAC__bool
+FLAC__format_vorbiscomment_entry_value_is_legal(const FLAC__byte* value,
+                                                uint32_t length);
 
 /** Check a Vorbis comment entry to see if it conforms to the Vorbis
  *  comment specification.
@@ -950,7 +1057,9 @@
  * \retval FLAC__bool
  *    \c false if entry name is illegal, else \c true.
  */
-FLAC_API FLAC__bool FLAC__format_vorbiscomment_entry_is_legal(const FLAC__byte *entry, unsigned length);
+FLAC_API FLAC__bool
+FLAC__format_vorbiscomment_entry_is_legal(const FLAC__byte* entry,
+                                          uint32_t length);
 
 /** Check a seek table to see if it conforms to the FLAC specification.
  *  See the format specification for limits on the contents of the
@@ -973,10 +1082,11 @@
  * \param seek_table  A pointer to a seek table to be sorted.
  * \assert
  *    \code seek_table != NULL \endcode
- * \retval unsigned
+ * \retval uint32_t
  *    The number of duplicate seek points converted into placeholders.
  */
-FLAC_API unsigned FLAC__format_seektable_sort(FLAC__StreamMetadata_SeekTable *seek_table);
+FLAC_API uint32_t
+FLAC__format_seektable_sort(FLAC__StreamMetadata_SeekTable* seek_table);
 
 /** Check a cue sheet to see if it conforms to the FLAC specification.
  *  See the format specification for limits on the contents of the

diff --git a/include/FLAC/metadata.h b/include/FLAC/metadata.h
index 8951532..1526cdc 100644
--- a/include/FLAC/metadata.h
+++ b/include/FLAC/metadata.h

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2001-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -93,7 +93,7 @@
  *  Efficient means the whole file is rewritten at most one time, and only
  *  when necessary.  Level 1 is not efficient only in the case that you
  *  cause more than one metadata block to grow or shrink beyond what can
- *  be accomodated by padding.  In this case you should probably use level
+ *  be accommodated by padding.  In this case you should probably use level
  *  2, which allows you to edit all the metadata for a file in memory and
  *  write it out all at once.
  *
@@ -125,7 +125,6 @@
 extern "C" {
 #endif
 
-
 /** \defgroup flac_metadata_level0 FLAC/metadata.h: metadata level 0 interface
  *  \ingroup flac_metadata
  *
@@ -134,6 +133,11 @@
  *  STREAMINFO, VORBIS_COMMENT, CUESHEET, and PICTURE blocks, requiring
  *  only a filename.
  *
+ *  On Windows, filename must be a UTF-8 encoded filename, which libFLAC
+ *  internally translates to an appropriate representation to use with
+ *  _wfopen. On all other systems, filename is passed to fopen without
+ *  any translation.
+ *
  *  They try to skip any ID3v2 tag at the head of the file.
  *
  * \{
@@ -217,13 +221,13 @@
  *                    matched exactly.  Use \c NULL to mean "any
  *                    description".
  * \param max_width   The maximum width in pixels desired.  Use
- *                    \c (unsigned)(-1) to mean "any width".
+ *                    \c (uint32_t)(-1) to mean "any width".
  * \param max_height  The maximum height in pixels desired.  Use
- *                    \c (unsigned)(-1) to mean "any height".
+ *                    \c (uint32_t)(-1) to mean "any height".
  * \param max_depth   The maximum color depth in bits-per-pixel desired.
- *                    Use \c (unsigned)(-1) to mean "any depth".
+ *                    Use \c (uint32_t)(-1) to mean "any depth".
  * \param max_colors  The maximum number of colors desired.  Use
- *                    \c (unsigned)(-1) to mean "any number of colors".
+ *                    \c (uint32_t)(-1) to mean "any number of colors".
  * \assert
  *    \code filename != NULL \endcode
  *    \code picture != NULL \endcode
@@ -234,7 +238,16 @@
  *    error, a file decoder error, or the file contained no PICTURE
  *    block, and \a *picture will be set to \c NULL.
  */
-FLAC_API FLAC__bool FLAC__metadata_get_picture(const char *filename, FLAC__StreamMetadata **picture, FLAC__StreamMetadata_Picture_Type type, const char *mime_type, const FLAC__byte *description, unsigned max_width, unsigned max_height, unsigned max_depth, unsigned max_colors);
+FLAC_API FLAC__bool
+FLAC__metadata_get_picture(const char* filename,
+                           FLAC__StreamMetadata** picture,
+                           FLAC__StreamMetadata_Picture_Type type,
+                           const char* mime_type,
+                           const FLAC__byte* description,
+                           uint32_t max_width,
+                           uint32_t max_height,
+                           uint32_t max_depth,
+                           uint32_t max_colors);
 
 /* \} */
 
@@ -387,6 +400,11 @@
 /** Initialize the iterator to point to the first metadata block in the
  *  given FLAC file.
  *
+ *  On Windows, filename must be a UTF-8 encoded filename, which libFLAC
+ *  internally translates to an appropriate representation to use with
+ *  _wfopen. On all other systems, filename is passed to fopen without
+ *  any translation.
+ *
  * \param iterator             A pointer to an existing iterator.
  * \param filename             The path to the FLAC file.
  * \param read_only            If \c true, the FLAC file will be opened
@@ -497,13 +515,16 @@
  *    \code iterator != NULL \endcode
  *    \a iterator has been successfully initialized with
  *    FLAC__metadata_simple_iterator_init()
- * \retval unsigned
+ * \retval uint32_t
  *    The length of the metadata block at the current iterator position.
  *    The is same length as that in the
- *    <a href="http://xiph.org/flac/format.html#metadata_block_header">metadata block header</a>,
- *    i.e. the length of the metadata body that follows the header.
+ *    <a
+ * href="http://xiph.org/flhttps://xiph.org/flac/format.html#metadata_block_header">metadata
+ * block header</a>, i.e. the length of the metadata body that follows the
+ * header.
  */
-FLAC_API unsigned FLAC__metadata_simple_iterator_get_block_length(const FLAC__Metadata_SimpleIterator *iterator);
+FLAC_API uint32_t FLAC__metadata_simple_iterator_get_block_length(
+    const FLAC__Metadata_SimpleIterator* iterator);
 
 /** Get the application ID of the \c APPLICATION block at the current
  *  position.  This avoids reading the actual block data which can save
@@ -650,7 +671,6 @@
 
 /* \} */
 
-
 /** \defgroup flac_metadata_level2 FLAC/metadata.h: metadata level 2 interface
  *  \ingroup flac_metadata
  *
@@ -667,7 +687,7 @@
  *
  * - Create a new chain using FLAC__metadata_chain_new().  A chain is a
  *   linked list of FLAC metadata blocks.
- * - Read all metadata into the the chain from a FLAC file using
+ * - Read all metadata into the chain from a FLAC file using
  *   FLAC__metadata_chain_read() or FLAC__metadata_chain_read_ogg() and
  *   check the status.
  * - Optionally, consolidate the padding using
@@ -691,8 +711,9 @@
  * Even though the FLAC file is not open while the chain is being
  * manipulated, you must not alter the file externally during
  * this time.  The chain assumes the FLAC file will not change
- * between the time of FLAC__metadata_chain_read()/FLAC__metadata_chain_read_ogg()
- * and FLAC__metadata_chain_write().
+ * between the time of
+ * FLAC__metadata_chain_read()/FLAC__metadata_chain_read_ogg() and
+ * FLAC__metadata_chain_write().
  *
  * \note
  * Do not modify the is_last, length, or type fields of returned
@@ -719,66 +740,67 @@
 typedef struct FLAC__Metadata_Iterator FLAC__Metadata_Iterator;
 
 typedef enum {
-	FLAC__METADATA_CHAIN_STATUS_OK = 0,
-	/**< The chain is in the normal OK state */
+  FLAC__METADATA_CHAIN_STATUS_OK = 0,
+  /**< The chain is in the normal OK state */
 
-	FLAC__METADATA_CHAIN_STATUS_ILLEGAL_INPUT,
-	/**< The data passed into a function violated the function's usage criteria */
+  FLAC__METADATA_CHAIN_STATUS_ILLEGAL_INPUT,
+  /**< The data passed into a function violated the function's usage criteria */
 
-	FLAC__METADATA_CHAIN_STATUS_ERROR_OPENING_FILE,
-	/**< The chain could not open the target file */
+  FLAC__METADATA_CHAIN_STATUS_ERROR_OPENING_FILE,
+  /**< The chain could not open the target file */
 
-	FLAC__METADATA_CHAIN_STATUS_NOT_A_FLAC_FILE,
-	/**< The chain could not find the FLAC signature at the start of the file */
+  FLAC__METADATA_CHAIN_STATUS_NOT_A_FLAC_FILE,
+  /**< The chain could not find the FLAC signature at the start of the file */
 
-	FLAC__METADATA_CHAIN_STATUS_NOT_WRITABLE,
-	/**< The chain tried to write to a file that was not writable */
+  FLAC__METADATA_CHAIN_STATUS_NOT_WRITABLE,
+  /**< The chain tried to write to a file that was not writable */
 
-	FLAC__METADATA_CHAIN_STATUS_BAD_METADATA,
-	/**< The chain encountered input that does not conform to the FLAC metadata specification */
+  FLAC__METADATA_CHAIN_STATUS_BAD_METADATA,
+  /**< The chain encountered input that does not conform to the FLAC metadata
+     specification */
 
-	FLAC__METADATA_CHAIN_STATUS_READ_ERROR,
-	/**< The chain encountered an error while reading the FLAC file */
+  FLAC__METADATA_CHAIN_STATUS_READ_ERROR,
+  /**< The chain encountered an error while reading the FLAC file */
 
-	FLAC__METADATA_CHAIN_STATUS_SEEK_ERROR,
-	/**< The chain encountered an error while seeking in the FLAC file */
+  FLAC__METADATA_CHAIN_STATUS_SEEK_ERROR,
+  /**< The chain encountered an error while seeking in the FLAC file */
 
-	FLAC__METADATA_CHAIN_STATUS_WRITE_ERROR,
-	/**< The chain encountered an error while writing the FLAC file */
+  FLAC__METADATA_CHAIN_STATUS_WRITE_ERROR,
+  /**< The chain encountered an error while writing the FLAC file */
 
-	FLAC__METADATA_CHAIN_STATUS_RENAME_ERROR,
-	/**< The chain encountered an error renaming the FLAC file */
+  FLAC__METADATA_CHAIN_STATUS_RENAME_ERROR,
+  /**< The chain encountered an error renaming the FLAC file */
 
-	FLAC__METADATA_CHAIN_STATUS_UNLINK_ERROR,
-	/**< The chain encountered an error removing the temporary file */
+  FLAC__METADATA_CHAIN_STATUS_UNLINK_ERROR,
+  /**< The chain encountered an error removing the temporary file */
 
-	FLAC__METADATA_CHAIN_STATUS_MEMORY_ALLOCATION_ERROR,
-	/**< Memory allocation failed */
+  FLAC__METADATA_CHAIN_STATUS_MEMORY_ALLOCATION_ERROR,
+  /**< Memory allocation failed */
 
-	FLAC__METADATA_CHAIN_STATUS_INTERNAL_ERROR,
-	/**< The caller violated an assertion or an unexpected error occurred */
+  FLAC__METADATA_CHAIN_STATUS_INTERNAL_ERROR,
+  /**< The caller violated an assertion or an unexpected error occurred */
 
-	FLAC__METADATA_CHAIN_STATUS_INVALID_CALLBACKS,
-	/**< One or more of the required callbacks was NULL */
+  FLAC__METADATA_CHAIN_STATUS_INVALID_CALLBACKS,
+  /**< One or more of the required callbacks was NULL */
 
-	FLAC__METADATA_CHAIN_STATUS_READ_WRITE_MISMATCH,
-	/**< FLAC__metadata_chain_write() was called on a chain read by
-	 *   FLAC__metadata_chain_read_with_callbacks()/FLAC__metadata_chain_read_ogg_with_callbacks(),
-	 *   or 
-	 *   FLAC__metadata_chain_write_with_callbacks()/FLAC__metadata_chain_write_with_callbacks_and_tempfile()
-	 *   was called on a chain read by
-	 *   FLAC__metadata_chain_read()/FLAC__metadata_chain_read_ogg().
-	 *   Matching read/write methods must always be used. */
+  FLAC__METADATA_CHAIN_STATUS_READ_WRITE_MISMATCH,
+  /**< FLAC__metadata_chain_write() was called on a chain read by
+   *   FLAC__metadata_chain_read_with_callbacks()/FLAC__metadata_chain_read_ogg_with_callbacks(),
+   *   or
+   *   FLAC__metadata_chain_write_with_callbacks()/FLAC__metadata_chain_write_with_callbacks_and_tempfile()
+   *   was called on a chain read by
+   *   FLAC__metadata_chain_read()/FLAC__metadata_chain_read_ogg().
+   *   Matching read/write methods must always be used. */
 
-	FLAC__METADATA_CHAIN_STATUS_WRONG_WRITE_CALL
-	/**< FLAC__metadata_chain_write_with_callbacks() was called when the
-	 *   chain write requires a tempfile; use
-	 *   FLAC__metadata_chain_write_with_callbacks_and_tempfile() instead.
-	 *   Or, FLAC__metadata_chain_write_with_callbacks_and_tempfile() was
-	 *   called when the chain write does not require a tempfile; use
-	 *   FLAC__metadata_chain_write_with_callbacks() instead.
-	 *   Always check FLAC__metadata_chain_check_if_tempfile_needed()
-	 *   before writing via callbacks. */
+  FLAC__METADATA_CHAIN_STATUS_WRONG_WRITE_CALL
+  /**< FLAC__metadata_chain_write_with_callbacks() was called when the
+   *   chain write requires a tempfile; use
+   *   FLAC__metadata_chain_write_with_callbacks_and_tempfile() instead.
+   *   Or, FLAC__metadata_chain_write_with_callbacks_and_tempfile() was
+   *   called when the chain write does not require a tempfile; use
+   *   FLAC__metadata_chain_write_with_callbacks() instead.
+   *   Always check FLAC__metadata_chain_check_if_tempfile_needed()
+   *   before writing via callbacks. */
 
 } FLAC__Metadata_ChainStatus;
 
@@ -820,6 +842,11 @@
 
 /** Read all metadata from a FLAC file into the chain.
  *
+ *  On Windows, filename must be a UTF-8 encoded filename, which libFLAC
+ *  internally translates to an appropriate representation to use with
+ *  _wfopen. On all other systems, filename is passed to fopen without
+ *  any translation.
+ *
  * \param chain    A pointer to an existing chain.
  * \param filename The path to the FLAC file to read.
  * \assert
@@ -834,6 +861,11 @@
 
 /** Read all metadata from an Ogg FLAC file into the chain.
  *
+ *  On Windows, filename must be a UTF-8 encoded filename, which libFLAC
+ *  internally translates to an appropriate representation to use with
+ *  _wfopen. On all other systems, filename is passed to fopen without
+ *  any translation.
+ *
  * \note Ogg FLAC metadata data writing is not supported yet and
  * FLAC__metadata_chain_write() will fail.
  *
@@ -1373,24 +1405,31 @@
  * \retval FLAC__bool
  *    \c false if \a copy is \c true and malloc() fails, else \c true.
  */
-FLAC_API FLAC__bool FLAC__metadata_object_application_set_data(FLAC__StreamMetadata *object, FLAC__byte *data, unsigned length, FLAC__bool copy);
+FLAC_API FLAC__bool
+FLAC__metadata_object_application_set_data(FLAC__StreamMetadata* object,
+                                           FLAC__byte* data,
+                                           uint32_t length,
+                                           FLAC__bool copy);
 
 /** Resize the seekpoint array.
  *
  *  If the size shrinks, elements will truncated; if it grows, new placeholder
- *  points will be added to the end.
+ *  points will be added to the end. If this function returns false, the
+ *  object is left untouched.
  *
  * \param object          A pointer to an existing SEEKTABLE object.
  * \param new_num_points  The desired length of the array; may be \c 0.
  * \assert
  *    \code object != NULL \endcode
  *    \code object->type == FLAC__METADATA_TYPE_SEEKTABLE \endcode
- *    \code (object->data.seek_table.points == NULL && object->data.seek_table.num_points == 0) ||
- * (object->data.seek_table.points != NULL && object->data.seek_table.num_points > 0) \endcode
- * \retval FLAC__bool
- *    \c false if memory allocation error, else \c true.
+ *    \code (object->data.seek_table.points == NULL &&
+ * object->data.seek_table.num_points == 0) || (object->data.seek_table.points
+ * != NULL && object->data.seek_table.num_points > 0) \endcode \retval
+ * FLAC__bool \c false if memory allocation error, else \c true.
  */
-FLAC_API FLAC__bool FLAC__metadata_object_seektable_resize_points(FLAC__StreamMetadata *object, unsigned new_num_points);
+FLAC_API FLAC__bool
+FLAC__metadata_object_seektable_resize_points(FLAC__StreamMetadata* object,
+                                              uint32_t new_num_points);
 
 /** Set a seekpoint in a seektable.
  *
@@ -1402,7 +1441,10 @@
  *    \code object->type == FLAC__METADATA_TYPE_SEEKTABLE \endcode
  *    \code object->data.seek_table.num_points > point_num \endcode
  */
-FLAC_API void FLAC__metadata_object_seektable_set_point(FLAC__StreamMetadata *object, unsigned point_num, FLAC__StreamMetadata_SeekPoint point);
+FLAC_API void FLAC__metadata_object_seektable_set_point(
+    FLAC__StreamMetadata* object,
+    uint32_t point_num,
+    FLAC__StreamMetadata_SeekPoint point);
 
 /** Insert a seekpoint into a seektable.
  *
@@ -1416,7 +1458,10 @@
  * \retval FLAC__bool
  *    \c false if memory allocation error, else \c true.
  */
-FLAC_API FLAC__bool FLAC__metadata_object_seektable_insert_point(FLAC__StreamMetadata *object, unsigned point_num, FLAC__StreamMetadata_SeekPoint point);
+FLAC_API FLAC__bool FLAC__metadata_object_seektable_insert_point(
+    FLAC__StreamMetadata* object,
+    uint32_t point_num,
+    FLAC__StreamMetadata_SeekPoint point);
 
 /** Delete a seekpoint from a seektable.
  *
@@ -1429,7 +1474,9 @@
  * \retval FLAC__bool
  *    \c false if memory allocation error, else \c true.
  */
-FLAC_API FLAC__bool FLAC__metadata_object_seektable_delete_point(FLAC__StreamMetadata *object, unsigned point_num);
+FLAC_API FLAC__bool
+FLAC__metadata_object_seektable_delete_point(FLAC__StreamMetadata* object,
+                                             uint32_t point_num);
 
 /** Check a seektable to see if it conforms to the FLAC specification.
  *  See the format specification for limits on the contents of the
@@ -1459,7 +1506,10 @@
  * \retval FLAC__bool
  *    \c false if memory allocation fails, else \c true.
  */
-FLAC_API FLAC__bool FLAC__metadata_object_seektable_template_append_placeholders(FLAC__StreamMetadata *object, unsigned num);
+FLAC_API FLAC__bool
+FLAC__metadata_object_seektable_template_append_placeholders(
+    FLAC__StreamMetadata* object,
+    uint32_t num);
 
 /** Append a specific seek point template to the end of a seek table.
  *
@@ -1494,7 +1544,10 @@
  * \retval FLAC__bool
  *    \c false if memory allocation fails, else \c true.
  */
-FLAC_API FLAC__bool FLAC__metadata_object_seektable_template_append_points(FLAC__StreamMetadata *object, FLAC__uint64 sample_numbers[], unsigned num);
+FLAC_API FLAC__bool FLAC__metadata_object_seektable_template_append_points(
+    FLAC__StreamMetadata* object,
+    FLAC__uint64 sample_numbers[],
+    uint32_t num);
 
 /** Append a set of evenly-spaced seek point templates to the end of a
  *  seek table.
@@ -1516,7 +1569,11 @@
  * \retval FLAC__bool
  *    \c false if memory allocation fails, else \c true.
  */
-FLAC_API FLAC__bool FLAC__metadata_object_seektable_template_append_spaced_points(FLAC__StreamMetadata *object, unsigned num, FLAC__uint64 total_samples);
+FLAC_API FLAC__bool
+FLAC__metadata_object_seektable_template_append_spaced_points(
+    FLAC__StreamMetadata* object,
+    uint32_t num,
+    FLAC__uint64 total_samples);
 
 /** Append a set of evenly-spaced seek point templates to the end of a
  *  seek table.
@@ -1544,7 +1601,11 @@
  * \retval FLAC__bool
  *    \c false if memory allocation fails, else \c true.
  */
-FLAC_API FLAC__bool FLAC__metadata_object_seektable_template_append_spaced_points_by_samples(FLAC__StreamMetadata *object, unsigned samples, FLAC__uint64 total_samples);
+FLAC_API FLAC__bool
+FLAC__metadata_object_seektable_template_append_spaced_points_by_samples(
+    FLAC__StreamMetadata* object,
+    uint32_t samples,
+    FLAC__uint64 total_samples);
 
 /** Sort a seek table's seek points according to the format specification,
  *  removing duplicates.
@@ -1591,19 +1652,23 @@
 /** Resize the comment array.
  *
  *  If the size shrinks, elements will truncated; if it grows, new empty
- *  fields will be added to the end.
+ *  fields will be added to the end.  If this function returns false, the
+ *  object is left untouched.
  *
  * \param object            A pointer to an existing VORBIS_COMMENT object.
  * \param new_num_comments  The desired length of the array; may be \c 0.
  * \assert
  *    \code object != NULL \endcode
  *    \code object->type == FLAC__METADATA_TYPE_VORBIS_COMMENT \endcode
- *    \code (object->data.vorbis_comment.comments == NULL && object->data.vorbis_comment.num_comments == 0) ||
- * (object->data.vorbis_comment.comments != NULL && object->data.vorbis_comment.num_comments > 0) \endcode
- * \retval FLAC__bool
- *    \c false if memory allocation fails, else \c true.
+ *    \code (object->data.vorbis_comment.comments == NULL &&
+ * object->data.vorbis_comment.num_comments == 0) ||
+ * (object->data.vorbis_comment.comments != NULL &&
+ * object->data.vorbis_comment.num_comments > 0) \endcode \retval FLAC__bool \c
+ * false if memory allocation fails, else \c true.
  */
-FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_resize_comments(FLAC__StreamMetadata *object, unsigned new_num_comments);
+FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_resize_comments(
+    FLAC__StreamMetadata* object,
+    uint32_t new_num_comments);
 
 /** Sets a comment in a VORBIS_COMMENT block.
  *
@@ -1630,7 +1695,11 @@
  *    \c false if memory allocation fails or \a entry does not comply with the
  *    Vorbis comment specification, else \c true.
  */
-FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_set_comment(FLAC__StreamMetadata *object, unsigned comment_num, FLAC__StreamMetadata_VorbisComment_Entry entry, FLAC__bool copy);
+FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_set_comment(
+    FLAC__StreamMetadata* object,
+    uint32_t comment_num,
+    FLAC__StreamMetadata_VorbisComment_Entry entry,
+    FLAC__bool copy);
 
 /** Insert a comment in a VORBIS_COMMENT block at the given index.
  *
@@ -1660,7 +1729,11 @@
  *    \c false if memory allocation fails or \a entry does not comply with the
  *    Vorbis comment specification, else \c true.
  */
-FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_insert_comment(FLAC__StreamMetadata *object, unsigned comment_num, FLAC__StreamMetadata_VorbisComment_Entry entry, FLAC__bool copy);
+FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_insert_comment(
+    FLAC__StreamMetadata* object,
+    uint32_t comment_num,
+    FLAC__StreamMetadata_VorbisComment_Entry entry,
+    FLAC__bool copy);
 
 /** Appends a comment to a VORBIS_COMMENT block.
  *
@@ -1692,7 +1765,7 @@
  *  For convenience, a trailing NUL is added to the entry if it doesn't have
  *  one already.
  *
- *  Depending on the the value of \a all, either all or just the first comment
+ *  Depending on the value of \a all, either all or just the first comment
  *  whose field name(s) match the given entry's name will be replaced by the
  *  given entry.  If no comments match, \a entry will simply be appended.
  *
@@ -1733,7 +1806,9 @@
  * \retval FLAC__bool
  *    \c false if realloc() fails, else \c true.
  */
-FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_delete_comment(FLAC__StreamMetadata *object, unsigned comment_num);
+FLAC_API FLAC__bool
+FLAC__metadata_object_vorbiscomment_delete_comment(FLAC__StreamMetadata* object,
+                                                   uint32_t comment_num);
 
 /** Creates a Vorbis comment entry from NUL-terminated name and value strings.
  *
@@ -1789,7 +1864,10 @@
  * \retval FLAC__bool
  *    \c true if the field names match, else \c false
  */
-FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_entry_matches(const FLAC__StreamMetadata_VorbisComment_Entry entry, const char *field_name, unsigned field_name_length);
+FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_entry_matches(
+    const FLAC__StreamMetadata_VorbisComment_Entry entry,
+    const char* field_name,
+    uint32_t field_name_length);
 
 /** Find a Vorbis comment with the given field name.
  *
@@ -1808,7 +1886,10 @@
  *    The offset in the comment array of the first comment whose field
  *    name matches \a field_name, or \c -1 if no match was found.
  */
-FLAC_API int FLAC__metadata_object_vorbiscomment_find_entry_from(const FLAC__StreamMetadata *object, unsigned offset, const char *field_name);
+FLAC_API int FLAC__metadata_object_vorbiscomment_find_entry_from(
+    const FLAC__StreamMetadata* object,
+    uint32_t offset,
+    const char* field_name);
 
 /** Remove first Vorbis comment matching the given field name.
  *
@@ -1871,7 +1952,8 @@
 /** Resize a track's index point array.
  *
  *  If the size shrinks, elements will truncated; if it grows, new blank
- *  indices will be added to the end.
+ *  indices will be added to the end. If this function returns false, the
+ *  track object is left untouched.
  *
  * \param object           A pointer to an existing CUESHEET object.
  * \param track_num        The index of the track to modify.  NOTE: this is not
@@ -1881,12 +1963,16 @@
  *    \code object != NULL \endcode
  *    \code object->type == FLAC__METADATA_TYPE_CUESHEET \endcode
  *    \code object->data.cue_sheet.num_tracks > track_num \endcode
- *    \code (object->data.cue_sheet.tracks[track_num].indices == NULL && object->data.cue_sheet.tracks[track_num].num_indices == 0) ||
- * (object->data.cue_sheet.tracks[track_num].indices != NULL && object->data.cue_sheet.tracks[track_num].num_indices > 0) \endcode
- * \retval FLAC__bool
- *    \c false if memory allocation error, else \c true.
+ *    \code (object->data.cue_sheet.tracks[track_num].indices == NULL &&
+ * object->data.cue_sheet.tracks[track_num].num_indices == 0) ||
+ * (object->data.cue_sheet.tracks[track_num].indices != NULL &&
+ * object->data.cue_sheet.tracks[track_num].num_indices > 0) \endcode \retval
+ * FLAC__bool \c false if memory allocation error, else \c true.
  */
-FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_track_resize_indices(FLAC__StreamMetadata *object, unsigned track_num, unsigned new_num_indices);
+FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_track_resize_indices(
+    FLAC__StreamMetadata* object,
+    uint32_t track_num,
+    uint32_t new_num_indices);
 
 /** Insert an index point in a CUESHEET track at the given index.
  *
@@ -1909,7 +1995,11 @@
  * \retval FLAC__bool
  *    \c false if realloc() fails, else \c true.
  */
-FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_track_insert_index(FLAC__StreamMetadata *object, unsigned track_num, unsigned index_num, FLAC__StreamMetadata_CueSheet_Index index);
+FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_track_insert_index(
+    FLAC__StreamMetadata* object,
+    uint32_t track_num,
+    uint32_t index_num,
+    FLAC__StreamMetadata_CueSheet_Index index);
 
 /** Insert a blank index point in a CUESHEET track at the given index.
  *
@@ -1933,7 +2023,10 @@
  * \retval FLAC__bool
  *    \c false if realloc() fails, else \c true.
  */
-FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_track_insert_blank_index(FLAC__StreamMetadata *object, unsigned track_num, unsigned index_num);
+FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_track_insert_blank_index(
+    FLAC__StreamMetadata* object,
+    uint32_t track_num,
+    uint32_t index_num);
 
 /** Delete an index point in a CUESHEET track at the given index.
  *
@@ -1952,24 +2045,30 @@
  * \retval FLAC__bool
  *    \c false if realloc() fails, else \c true.
  */
-FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_track_delete_index(FLAC__StreamMetadata *object, unsigned track_num, unsigned index_num);
+FLAC_API FLAC__bool
+FLAC__metadata_object_cuesheet_track_delete_index(FLAC__StreamMetadata* object,
+                                                  uint32_t track_num,
+                                                  uint32_t index_num);
 
 /** Resize the track array.
  *
  *  If the size shrinks, elements will truncated; if it grows, new blank
- *  tracks will be added to the end.
+ *  tracks will be added to the end.  If this function returns false, the
+ *  object is left untouched.
  *
  * \param object            A pointer to an existing CUESHEET object.
  * \param new_num_tracks    The desired length of the array; may be \c 0.
  * \assert
  *    \code object != NULL \endcode
  *    \code object->type == FLAC__METADATA_TYPE_CUESHEET \endcode
- *    \code (object->data.cue_sheet.tracks == NULL && object->data.cue_sheet.num_tracks == 0) ||
- * (object->data.cue_sheet.tracks != NULL && object->data.cue_sheet.num_tracks > 0) \endcode
- * \retval FLAC__bool
- *    \c false if memory allocation error, else \c true.
+ *    \code (object->data.cue_sheet.tracks == NULL &&
+ * object->data.cue_sheet.num_tracks == 0) || (object->data.cue_sheet.tracks !=
+ * NULL && object->data.cue_sheet.num_tracks > 0) \endcode \retval FLAC__bool \c
+ * false if memory allocation error, else \c true.
  */
-FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_resize_tracks(FLAC__StreamMetadata *object, unsigned new_num_tracks);
+FLAC_API FLAC__bool
+FLAC__metadata_object_cuesheet_resize_tracks(FLAC__StreamMetadata* object,
+                                             uint32_t new_num_tracks);
 
 /** Sets a track in a CUESHEET block.
  *
@@ -1991,7 +2090,11 @@
  * \retval FLAC__bool
  *    \c false if \a copy is \c true and malloc() fails, else \c true.
  */
-FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_set_track(FLAC__StreamMetadata *object, unsigned track_num, FLAC__StreamMetadata_CueSheet_Track *track, FLAC__bool copy);
+FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_set_track(
+    FLAC__StreamMetadata* object,
+    uint32_t track_num,
+    FLAC__StreamMetadata_CueSheet_Track* track,
+    FLAC__bool copy);
 
 /** Insert a track in a CUESHEET block at the given index.
  *
@@ -2014,7 +2117,11 @@
  * \retval FLAC__bool
  *    \c false if \a copy is \c true and malloc() fails, else \c true.
  */
-FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_insert_track(FLAC__StreamMetadata *object, unsigned track_num, FLAC__StreamMetadata_CueSheet_Track *track, FLAC__bool copy);
+FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_insert_track(
+    FLAC__StreamMetadata* object,
+    uint32_t track_num,
+    FLAC__StreamMetadata_CueSheet_Track* track,
+    FLAC__bool copy);
 
 /** Insert a blank track in a CUESHEET block at the given index.
  *
@@ -2033,7 +2140,9 @@
  * \retval FLAC__bool
  *    \c false if \a copy is \c true and malloc() fails, else \c true.
  */
-FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_insert_blank_track(FLAC__StreamMetadata *object, unsigned track_num);
+FLAC_API FLAC__bool
+FLAC__metadata_object_cuesheet_insert_blank_track(FLAC__StreamMetadata* object,
+                                                  uint32_t track_num);
 
 /** Delete a track in a CUESHEET block at the given index.
  *
@@ -2048,7 +2157,9 @@
  * \retval FLAC__bool
  *    \c false if realloc() fails, else \c true.
  */
-FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_delete_track(FLAC__StreamMetadata *object, unsigned track_num);
+FLAC_API FLAC__bool
+FLAC__metadata_object_cuesheet_delete_track(FLAC__StreamMetadata* object,
+                                            uint32_t track_num);
 
 /** Check a cue sheet to see if it conforms to the FLAC specification.
  *  See the format specification for limits on the contents of the

diff --git a/include/FLAC/ordinals.h b/include/FLAC/ordinals.h
index b1e1acf..77757d6 100644
--- a/include/FLAC/ordinals.h
+++ b/include/FLAC/ordinals.h

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2000-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -39,12 +39,11 @@
  * the 1999 ISO C Standard header file <stdint.h>.
  */
 
-typedef __int8 FLAC__int8;
+typedef signed __int8 FLAC__int8;
+typedef signed __int16 FLAC__int16;
+typedef signed __int32 FLAC__int32;
+typedef signed __int64 FLAC__int64;
 typedef unsigned __int8 FLAC__uint8;
-
-typedef __int16 FLAC__int16;
-typedef __int32 FLAC__int32;
-typedef __int64 FLAC__int64;
 typedef unsigned __int16 FLAC__uint16;
 typedef unsigned __int32 FLAC__uint32;
 typedef unsigned __int64 FLAC__uint64;

diff --git a/include/FLAC/stream_decoder.h b/include/FLAC/stream_decoder.h
index 9bfdd1f..30cabb0 100644
--- a/include/FLAC/stream_decoder.h
+++ b/include/FLAC/stream_decoder.h

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2000-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -201,45 +201,45 @@
  */
 typedef enum {
 
-	FLAC__STREAM_DECODER_SEARCH_FOR_METADATA = 0,
-	/**< The decoder is ready to search for metadata. */
+  FLAC__STREAM_DECODER_SEARCH_FOR_METADATA = 0,
+  /**< The decoder is ready to search for metadata. */
 
-	FLAC__STREAM_DECODER_READ_METADATA,
-	/**< The decoder is ready to or is in the process of reading metadata. */
+  FLAC__STREAM_DECODER_READ_METADATA,
+  /**< The decoder is ready to or is in the process of reading metadata. */
 
-	FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC,
-	/**< The decoder is ready to or is in the process of searching for the
-	 * frame sync code.
-	 */
+  FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC,
+  /**< The decoder is ready to or is in the process of searching for the
+   * frame sync code.
+   */
 
-	FLAC__STREAM_DECODER_READ_FRAME,
-	/**< The decoder is ready to or is in the process of reading a frame. */
+  FLAC__STREAM_DECODER_READ_FRAME,
+  /**< The decoder is ready to or is in the process of reading a frame. */
 
-	FLAC__STREAM_DECODER_END_OF_STREAM,
-	/**< The decoder has reached the end of the stream. */
+  FLAC__STREAM_DECODER_END_OF_STREAM,
+  /**< The decoder has reached the end of the stream. */
 
-	FLAC__STREAM_DECODER_OGG_ERROR,
-	/**< An error occurred in the underlying Ogg layer.  */
+  FLAC__STREAM_DECODER_OGG_ERROR,
+  /**< An error occurred in the underlying Ogg layer.  */
 
-	FLAC__STREAM_DECODER_SEEK_ERROR,
-	/**< An error occurred while seeking.  The decoder must be flushed
-	 * with FLAC__stream_decoder_flush() or reset with
-	 * FLAC__stream_decoder_reset() before decoding can continue.
-	 */
+  FLAC__STREAM_DECODER_SEEK_ERROR,
+  /**< An error occurred while seeking.  The decoder must be flushed
+   * with FLAC__stream_decoder_flush() or reset with
+   * FLAC__stream_decoder_reset() before decoding can continue.
+   */
 
-	FLAC__STREAM_DECODER_ABORTED,
-	/**< The decoder was aborted by the read callback. */
+  FLAC__STREAM_DECODER_ABORTED,
+  /**< The decoder was aborted by the read or write callback. */
 
-	FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR,
-	/**< An error occurred allocating memory.  The decoder is in an invalid
-	 * state and can no longer be used.
-	 */
+  FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR,
+  /**< An error occurred allocating memory.  The decoder is in an invalid
+   * state and can no longer be used.
+   */
 
-	FLAC__STREAM_DECODER_UNINITIALIZED
-	/**< The decoder is in the uninitialized state; one of the
-	 * FLAC__stream_decoder_init_*() functions must be called before samples
-	 * can be processed.
-	 */
+  FLAC__STREAM_DECODER_UNINITIALIZED
+  /**< The decoder is in the uninitialized state; one of the
+   * FLAC__stream_decoder_init_*() functions must be called before samples
+   * can be processed.
+   */
 
 } FLAC__StreamDecoderState;
 
@@ -408,7 +408,6 @@
  */
 extern FLAC_API const char * const FLAC__StreamDecoderWriteStatusString[];
 
-
 /** Possible values passed back to the FLAC__StreamDecoder error callback.
  *  \c FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC is the generic catch-
  *  all.  The rest could be caused by bad sync (false synchronization on
@@ -422,21 +421,28 @@
  *  could be because the decoder encountered a valid frame made by a future
  *  version of the encoder which it cannot parse, or because of a false
  *  sync making it appear as though an encountered frame was generated by
- *  a future encoder.
+ *  a future encoder. \c FLAC__STREAM_DECODER_ERROR_STATUS_BAD_METADATA is
+ *  caused by finding data that doesn't fit a metadata block (too large
+ *  or too small) or finding inconsistencies in the metadata, for example
+ *  a PICTURE block with an image that exceeds the size of the metadata
+ *  block.
  */
 typedef enum {
 
-	FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC,
-	/**< An error in the stream caused the decoder to lose synchronization. */
+  FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC,
+  /**< An error in the stream caused the decoder to lose synchronization. */
 
-	FLAC__STREAM_DECODER_ERROR_STATUS_BAD_HEADER,
-	/**< The decoder encountered a corrupted frame header. */
+  FLAC__STREAM_DECODER_ERROR_STATUS_BAD_HEADER,
+  /**< The decoder encountered a corrupted frame header. */
 
-	FLAC__STREAM_DECODER_ERROR_STATUS_FRAME_CRC_MISMATCH,
-	/**< The frame's data did not match the CRC in the footer. */
+  FLAC__STREAM_DECODER_ERROR_STATUS_FRAME_CRC_MISMATCH,
+  /**< The frame's data did not match the CRC in the footer. */
 
-	FLAC__STREAM_DECODER_ERROR_STATUS_UNPARSEABLE_STREAM
-	/**< The decoder encountered reserved fields in use in the stream. */
+  FLAC__STREAM_DECODER_ERROR_STATUS_UNPARSEABLE_STREAM,
+  /**< The decoder encountered reserved fields in use in the stream. */
+
+  FLAC__STREAM_DECODER_ERROR_STATUS_BAD_METADATA
+  /**< The decoder encountered a corrupted metadata block. */
 
 } FLAC__StreamDecoderErrorStatus;
 
@@ -674,7 +680,8 @@
  *                  samples of length \a frame->header.blocksize.
  *                  Channels will be ordered according to the FLAC
  *                  specification; see the documentation for the
- *                  <A HREF="../format.html#frame_header">frame header</A>.
+ *                  <A
+ * HREF="https://xiph.org/flac/format.html#frame_header">frame header</A>.
  * \param  client_data  The callee's client data set through
  *                      FLAC__stream_decoder_init_*().
  * \retval FLAC__StreamDecoderWriteStatus
@@ -920,7 +927,7 @@
  * \param  decoder  A decoder instance to query.
  * \assert
  *    \code decoder != NULL \endcode
- * \retval unsigned
+ * \retval uint32_t
  *    See above.
  */
 FLAC_API FLAC__uint64 FLAC__stream_decoder_get_total_samples(const FLAC__StreamDecoder *decoder);
@@ -932,10 +939,11 @@
  * \param  decoder  A decoder instance to query.
  * \assert
  *    \code decoder != NULL \endcode
- * \retval unsigned
+ * \retval uint32_t
  *    See above.
  */
-FLAC_API unsigned FLAC__stream_decoder_get_channels(const FLAC__StreamDecoder *decoder);
+FLAC_API uint32_t
+FLAC__stream_decoder_get_channels(const FLAC__StreamDecoder* decoder);
 
 /** Get the current channel assignment in the stream being decoded.
  *  Will only be valid after decoding has started and will contain the
@@ -956,10 +964,11 @@
  * \param  decoder  A decoder instance to query.
  * \assert
  *    \code decoder != NULL \endcode
- * \retval unsigned
+ * \retval uint32_t
  *    See above.
  */
-FLAC_API unsigned FLAC__stream_decoder_get_bits_per_sample(const FLAC__StreamDecoder *decoder);
+FLAC_API uint32_t
+FLAC__stream_decoder_get_bits_per_sample(const FLAC__StreamDecoder* decoder);
 
 /** Get the current sample rate in Hz of the stream being decoded.
  *  Will only be valid after decoding has started and will contain the
@@ -968,10 +977,11 @@
  * \param  decoder  A decoder instance to query.
  * \assert
  *    \code decoder != NULL \endcode
- * \retval unsigned
+ * \retval uint32_t
  *    See above.
  */
-FLAC_API unsigned FLAC__stream_decoder_get_sample_rate(const FLAC__StreamDecoder *decoder);
+FLAC_API uint32_t
+FLAC__stream_decoder_get_sample_rate(const FLAC__StreamDecoder* decoder);
 
 /** Get the current blocksize of the stream being decoded.
  *  Will only be valid after decoding has started and will contain the
@@ -980,10 +990,11 @@
  * \param  decoder  A decoder instance to query.
  * \assert
  *    \code decoder != NULL \endcode
- * \retval unsigned
+ * \retval uint32_t
  *    See above.
  */
-FLAC_API unsigned FLAC__stream_decoder_get_blocksize(const FLAC__StreamDecoder *decoder);
+FLAC_API uint32_t
+FLAC__stream_decoder_get_blocksize(const FLAC__StreamDecoder* decoder);
 
 /** Returns the decoder's current read position within the stream.
  *  The position is the byte offset from the start of the stream.
@@ -1006,6 +1017,17 @@
  */
 FLAC_API FLAC__bool FLAC__stream_decoder_get_decode_position(const FLAC__StreamDecoder *decoder, FLAC__uint64 *position);
 
+/** Return client_data from decoder.
+ *  The data pointed to by the pointer should not be modified.
+ *
+ * \param  decoder  A decoder instance.
+ * \retval const void *
+ *    The callee's client data set through FLAC__stream_decoder_init_*().
+ *    Do not modify the contents.
+ */
+FLAC_API const void* FLAC__stream_decoder_get_client_data(
+    FLAC__StreamDecoder* decoder);
+
 /** Initialize the decoder instance to decode native FLAC streams.
  *
  *  This flavor of initialization sets up the decoder to decode from a
@@ -1184,7 +1206,7 @@
  *                            Unless \a file is \c stdin, it will be closed
  *                            when FLAC__stream_decoder_finish() is called.
  *                            Note however that seeking will not work when
- *                            decoding from \c stdout since it is not seekable.
+ *                            decoding from \c stdin since it is not seekable.
  * \param  write_callback     See FLAC__StreamDecoderWriteCallback.  This
  *                            pointer must not be \c NULL.
  * \param  metadata_callback  See FLAC__StreamDecoderMetadataCallback.  This
@@ -1234,7 +1256,7 @@
  *                            Unless \a file is \c stdin, it will be closed
  *                            when FLAC__stream_decoder_finish() is called.
  *                            Note however that seeking will not work when
- *                            decoding from \c stdout since it is not seekable.
+ *                            decoding from \c stdin since it is not seekable.
  * \param  write_callback     See FLAC__StreamDecoderWriteCallback.  This
  *                            pointer must not be \c NULL.
  * \param  metadata_callback  See FLAC__StreamDecoderMetadataCallback.  This
@@ -1263,11 +1285,15 @@
 /** Initialize the decoder instance to decode native FLAC files.
  *
  *  This flavor of initialization sets up the decoder to decode from a plain
- *  native FLAC file.  If POSIX fopen() semantics are not sufficient, (for
- *  example, with Unicode filenames on Windows), you must use
- *  FLAC__stream_decoder_init_FILE(), or FLAC__stream_decoder_init_stream()
+ *  native FLAC file.  If POSIX fopen() semantics are not sufficient, you must
+ *  use FLAC__stream_decoder_init_FILE(), or FLAC__stream_decoder_init_stream()
  *  and provide callbacks for the I/O.
  *
+ *  On Windows, filename must be a UTF-8 encoded filename, which libFLAC
+ *  internally translates to an appropriate representation to use with
+ *  _wfopen. On all other systems, filename is passed to fopen without
+ *  any translation.
+ *
  *  This function should be called after FLAC__stream_decoder_new() and
  *  FLAC__stream_decoder_set_*() but before any of the
  *  FLAC__stream_decoder_process_*() functions.  Will set and return the
@@ -1275,23 +1301,18 @@
  *  if initialization succeeded.
  *
  * \param  decoder            An uninitialized decoder instance.
- * \param  filename           The name of the file to decode from.  The file will
- *                            be opened with fopen().  Use \c NULL to decode from
- *                            \c stdin.  Note that \c stdin is not seekable.
- * \param  write_callback     See FLAC__StreamDecoderWriteCallback.  This
- *                            pointer must not be \c NULL.
- * \param  metadata_callback  See FLAC__StreamDecoderMetadataCallback.  This
- *                            pointer may be \c NULL if the callback is not
- *                            desired.
- * \param  error_callback     See FLAC__StreamDecoderErrorCallback.  This
- *                            pointer must not be \c NULL.
- * \param  client_data        This value will be supplied to callbacks in their
- *                            \a client_data argument.
- * \assert
- *    \code decoder != NULL \endcode
- * \retval FLAC__StreamDecoderInitStatus
- *    \c FLAC__STREAM_DECODER_INIT_STATUS_OK if initialization was successful;
- *    see FLAC__StreamDecoderInitStatus for the meanings of other return values.
+ * \param  filename           The name of the file to decode from.  The file
+ * will be opened with fopen().  Use \c NULL to decode from \c stdin.  Note that
+ * \c stdin is not seekable. \param  write_callback     See
+ * FLAC__StreamDecoderWriteCallback.  This pointer must not be \c NULL. \param
+ * metadata_callback  See FLAC__StreamDecoderMetadataCallback.  This pointer may
+ * be \c NULL if the callback is not desired. \param  error_callback     See
+ * FLAC__StreamDecoderErrorCallback.  This pointer must not be \c NULL. \param
+ * client_data        This value will be supplied to callbacks in their \a
+ * client_data argument. \assert \code decoder != NULL \endcode \retval
+ * FLAC__StreamDecoderInitStatus \c FLAC__STREAM_DECODER_INIT_STATUS_OK if
+ * initialization was successful; see FLAC__StreamDecoderInitStatus for the
+ * meanings of other return values.
  */
 FLAC_API FLAC__StreamDecoderInitStatus FLAC__stream_decoder_init_file(
 	FLAC__StreamDecoder *decoder,
@@ -1305,10 +1326,14 @@
 /** Initialize the decoder instance to decode Ogg FLAC files.
  *
  *  This flavor of initialization sets up the decoder to decode from a plain
- *  Ogg FLAC file.  If POSIX fopen() semantics are not sufficient, (for
- *  example, with Unicode filenames on Windows), you must use
- *  FLAC__stream_decoder_init_ogg_FILE(), or FLAC__stream_decoder_init_ogg_stream()
- *  and provide callbacks for the I/O.
+ *  Ogg FLAC file.  If POSIX fopen() semantics are not sufficient, you must use
+ *  FLAC__stream_decoder_init_ogg_FILE(), or
+ * FLAC__stream_decoder_init_ogg_stream() and provide callbacks for the I/O.
+ *
+ *  On Windows, filename must be a UTF-8 encoded filename, which libFLAC
+ *  internally translates to an appropriate representation to use with
+ *  _wfopen. On all other systems, filename is passed to fopen without
+ *  any translation.
  *
  *  This function should be called after FLAC__stream_decoder_new() and
  *  FLAC__stream_decoder_set_*() but before any of the
@@ -1321,23 +1346,18 @@
  *  will return \c FLAC__STREAM_DECODER_INIT_STATUS_UNSUPPORTED_CONTAINER.
  *
  * \param  decoder            An uninitialized decoder instance.
- * \param  filename           The name of the file to decode from.  The file will
- *                            be opened with fopen().  Use \c NULL to decode from
- *                            \c stdin.  Note that \c stdin is not seekable.
- * \param  write_callback     See FLAC__StreamDecoderWriteCallback.  This
- *                            pointer must not be \c NULL.
- * \param  metadata_callback  See FLAC__StreamDecoderMetadataCallback.  This
- *                            pointer may be \c NULL if the callback is not
- *                            desired.
- * \param  error_callback     See FLAC__StreamDecoderErrorCallback.  This
- *                            pointer must not be \c NULL.
- * \param  client_data        This value will be supplied to callbacks in their
- *                            \a client_data argument.
- * \assert
- *    \code decoder != NULL \endcode
- * \retval FLAC__StreamDecoderInitStatus
- *    \c FLAC__STREAM_DECODER_INIT_STATUS_OK if initialization was successful;
- *    see FLAC__StreamDecoderInitStatus for the meanings of other return values.
+ * \param  filename           The name of the file to decode from.  The file
+ * will be opened with fopen().  Use \c NULL to decode from \c stdin.  Note that
+ * \c stdin is not seekable. \param  write_callback     See
+ * FLAC__StreamDecoderWriteCallback.  This pointer must not be \c NULL. \param
+ * metadata_callback  See FLAC__StreamDecoderMetadataCallback.  This pointer may
+ * be \c NULL if the callback is not desired. \param  error_callback     See
+ * FLAC__StreamDecoderErrorCallback.  This pointer must not be \c NULL. \param
+ * client_data        This value will be supplied to callbacks in their \a
+ * client_data argument. \assert \code decoder != NULL \endcode \retval
+ * FLAC__StreamDecoderInitStatus \c FLAC__STREAM_DECODER_INIT_STATUS_OK if
+ * initialization was successful; see FLAC__StreamDecoderInitStatus for the
+ * meanings of other return values.
  */
 FLAC_API FLAC__StreamDecoderInitStatus FLAC__stream_decoder_init_ogg_file(
 	FLAC__StreamDecoder *decoder,
@@ -1403,8 +1423,7 @@
  *  and is not seekable (i.e. no seek callback was provided or the seek
  *  callback returns \c FLAC__STREAM_DECODER_SEEK_STATUS_UNSUPPORTED), it
  *  is the duty of the client to start feeding data from the beginning of
- *  the stream on the next FLAC__stream_decoder_process() or
- *  FLAC__stream_decoder_process_interleaved() call.
+ *  the stream on the next FLAC__stream_decoder_process_*() call.
  *
  * \param  decoder  A decoder instance.
  * \assert

diff --git a/include/FLAC/stream_encoder.h b/include/FLAC/stream_encoder.h
index efc213a..1079b5c 100644
--- a/include/FLAC/stream_encoder.h
+++ b/include/FLAC/stream_encoder.h

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2000-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -72,8 +72,8 @@
  * \link flac_stream_encoder stream encoder \endlink module.
  */
 
-/** \defgroup flac_stream_encoder FLAC/stream_encoder.h: stream encoder interface
- *  \ingroup flac_encoder
+/** \defgroup flac_stream_encoder FLAC/stream_encoder.h: stream encoder
+ * interface \ingroup flac_encoder
  *
  *  \brief
  *  This module contains the functions which implement the stream
@@ -105,8 +105,9 @@
  *   prepare for encoding using
  *   - FLAC__stream_encoder_init_stream() or FLAC__stream_encoder_init_FILE()
  *     or FLAC__stream_encoder_init_file() for native FLAC
- *   - FLAC__stream_encoder_init_ogg_stream() or FLAC__stream_encoder_init_ogg_FILE()
- *     or FLAC__stream_encoder_init_ogg_file() for Ogg FLAC
+ *   - FLAC__stream_encoder_init_ogg_stream() or
+ * FLAC__stream_encoder_init_ogg_FILE() or FLAC__stream_encoder_init_ogg_file()
+ * for Ogg FLAC
  * - The program calls FLAC__stream_encoder_process() or
  *   FLAC__stream_encoder_process_interleaved() to encode data, which
  *   subsequently calls the callbacks when there is encoder data ready
@@ -129,15 +130,15 @@
  * Unlike the decoders, the stream encoder has many options that can
  * affect the speed and compression ratio.  When setting these parameters
  * you should have some basic knowledge of the format (see the
- * <A HREF="../documentation_format_overview.html">user-level documentation</A>
- * or the <A HREF="../format.html">formal description</A>).  The
- * FLAC__stream_encoder_set_*() functions themselves do not validate the
- * values as many are interdependent.  The FLAC__stream_encoder_init_*()
- * functions will do this, so make sure to pay attention to the state
- * returned by FLAC__stream_encoder_init_*() to make sure that it is
- * FLAC__STREAM_ENCODER_INIT_STATUS_OK.  Any parameters that are not set
- * before FLAC__stream_encoder_init_*() will take on the defaults from
- * the constructor.
+ * <A HREF="https://xiph.org/flac/documentation_format_overview.html">user-level
+ * documentation</A> or the <A HREF="https://xiph.org/flac/format.html">formal
+ * description</A>).  The FLAC__stream_encoder_set_*() functions themselves do
+ * not validate the values as many are interdependent.  The
+ * FLAC__stream_encoder_init_*() functions will do this, so make sure to pay
+ * attention to the state returned by FLAC__stream_encoder_init_*() to make sure
+ * that it is FLAC__STREAM_ENCODER_INIT_STATUS_OK.  Any parameters that are not
+ * set before FLAC__stream_encoder_init_*() will take on the defaults from the
+ * constructor.
  *
  * There are three initialization functions for native FLAC, one for
  * setting up the encoder to encode FLAC data to the client via
@@ -229,7 +230,6 @@
  * \{
  */
 
-
 /** State values for a FLAC__StreamEncoder.
  *
  * The encoder's state can be obtained by calling FLAC__stream_encoder_get_state().
@@ -292,61 +292,69 @@
  */
 typedef enum {
 
-	FLAC__STREAM_ENCODER_INIT_STATUS_OK = 0,
-	/**< Initialization was successful. */
+  FLAC__STREAM_ENCODER_INIT_STATUS_OK = 0,
+  /**< Initialization was successful. */
 
-	FLAC__STREAM_ENCODER_INIT_STATUS_ENCODER_ERROR,
-	/**< General failure to set up encoder; call FLAC__stream_encoder_get_state() for cause. */
+  FLAC__STREAM_ENCODER_INIT_STATUS_ENCODER_ERROR,
+  /**< General failure to set up encoder; call FLAC__stream_encoder_get_state()
+     for cause. */
 
-	FLAC__STREAM_ENCODER_INIT_STATUS_UNSUPPORTED_CONTAINER,
-	/**< The library was not compiled with support for the given container
-	 * format.
-	 */
+  FLAC__STREAM_ENCODER_INIT_STATUS_UNSUPPORTED_CONTAINER,
+  /**< The library was not compiled with support for the given container
+   * format.
+   */
 
-	FLAC__STREAM_ENCODER_INIT_STATUS_INVALID_CALLBACKS,
-	/**< A required callback was not supplied. */
+  FLAC__STREAM_ENCODER_INIT_STATUS_INVALID_CALLBACKS,
+  /**< A required callback was not supplied. */
 
-	FLAC__STREAM_ENCODER_INIT_STATUS_INVALID_NUMBER_OF_CHANNELS,
-	/**< The encoder has an invalid setting for number of channels. */
+  FLAC__STREAM_ENCODER_INIT_STATUS_INVALID_NUMBER_OF_CHANNELS,
+  /**< The encoder has an invalid setting for number of channels. */
 
-	FLAC__STREAM_ENCODER_INIT_STATUS_INVALID_BITS_PER_SAMPLE,
-	/**< The encoder has an invalid setting for bits-per-sample.
-	 * FLAC supports 4-32 bps but the reference encoder currently supports
-	 * only up to 24 bps.
-	 */
+  FLAC__STREAM_ENCODER_INIT_STATUS_INVALID_BITS_PER_SAMPLE,
+  /**< The encoder has an invalid setting for bits-per-sample.
+   * FLAC supports 4-32 bps.
+   */
 
-	FLAC__STREAM_ENCODER_INIT_STATUS_INVALID_SAMPLE_RATE,
-	/**< The encoder has an invalid setting for the input sample rate. */
+  FLAC__STREAM_ENCODER_INIT_STATUS_INVALID_SAMPLE_RATE,
+  /**< The encoder has an invalid setting for the input sample rate. */
 
-	FLAC__STREAM_ENCODER_INIT_STATUS_INVALID_BLOCK_SIZE,
-	/**< The encoder has an invalid setting for the block size. */
+  FLAC__STREAM_ENCODER_INIT_STATUS_INVALID_BLOCK_SIZE,
+  /**< The encoder has an invalid setting for the block size. */
 
-	FLAC__STREAM_ENCODER_INIT_STATUS_INVALID_MAX_LPC_ORDER,
-	/**< The encoder has an invalid setting for the maximum LPC order. */
+  FLAC__STREAM_ENCODER_INIT_STATUS_INVALID_MAX_LPC_ORDER,
+  /**< The encoder has an invalid setting for the maximum LPC order. */
 
-	FLAC__STREAM_ENCODER_INIT_STATUS_INVALID_QLP_COEFF_PRECISION,
-	/**< The encoder has an invalid setting for the precision of the quantized linear predictor coefficients. */
+  FLAC__STREAM_ENCODER_INIT_STATUS_INVALID_QLP_COEFF_PRECISION,
+  /**< The encoder has an invalid setting for the precision of the quantized
+     linear predictor coefficients. */
 
-	FLAC__STREAM_ENCODER_INIT_STATUS_BLOCK_SIZE_TOO_SMALL_FOR_LPC_ORDER,
-	/**< The specified block size is less than the maximum LPC order. */
+  FLAC__STREAM_ENCODER_INIT_STATUS_BLOCK_SIZE_TOO_SMALL_FOR_LPC_ORDER,
+  /**< The specified block size is less than the maximum LPC order. */
 
-	FLAC__STREAM_ENCODER_INIT_STATUS_NOT_STREAMABLE,
-	/**< The encoder is bound to the <A HREF="../format.html#subset">Subset</A> but other settings violate it. */
+  FLAC__STREAM_ENCODER_INIT_STATUS_NOT_STREAMABLE,
+  /**< The encoder is bound to the <A
+     HREF="https://xiph.org/flac/format.html#subset">Subset</A> but other
+     settings violate it. */
 
-	FLAC__STREAM_ENCODER_INIT_STATUS_INVALID_METADATA,
-	/**< The metadata input to the encoder is invalid, in one of the following ways:
-	 * - FLAC__stream_encoder_set_metadata() was called with a null pointer but a block count > 0
-	 * - One of the metadata blocks contains an undefined type
-	 * - It contains an illegal CUESHEET as checked by FLAC__format_cuesheet_is_legal()
-	 * - It contains an illegal SEEKTABLE as checked by FLAC__format_seektable_is_legal()
-	 * - It contains more than one SEEKTABLE block or more than one VORBIS_COMMENT block
-	 */
+  FLAC__STREAM_ENCODER_INIT_STATUS_INVALID_METADATA,
+  /**< The metadata input to the encoder is invalid, in one of the following
+   * ways:
+   * - FLAC__stream_encoder_set_metadata() was called with a null pointer but a
+   * block count > 0
+   * - One of the metadata blocks contains an undefined type
+   * - It contains an illegal CUESHEET as checked by
+   * FLAC__format_cuesheet_is_legal()
+   * - It contains an illegal SEEKTABLE as checked by
+   * FLAC__format_seektable_is_legal()
+   * - It contains more than one SEEKTABLE block or more than one VORBIS_COMMENT
+   * block
+   */
 
-	FLAC__STREAM_ENCODER_INIT_STATUS_ALREADY_INITIALIZED
-	/**< FLAC__stream_encoder_init_*() was called when the encoder was
-	 * already initialized, usually because
-	 * FLAC__stream_encoder_finish() was not called.
-	 */
+  FLAC__STREAM_ENCODER_INIT_STATUS_ALREADY_INITIALIZED
+  /**< FLAC__stream_encoder_init_*() was called when the encoder was
+   * already initialized, usually because
+   * FLAC__stream_encoder_finish() was not called.
+   */
 
 } FLAC__StreamEncoderInitStatus;
 
@@ -554,7 +562,13 @@
  * \retval FLAC__StreamEncoderWriteStatus
  *    The callee's return status.
  */
-typedef FLAC__StreamEncoderWriteStatus (*FLAC__StreamEncoderWriteCallback)(const FLAC__StreamEncoder *encoder, const FLAC__byte buffer[], size_t bytes, unsigned samples, unsigned current_frame, void *client_data);
+typedef FLAC__StreamEncoderWriteStatus (*FLAC__StreamEncoderWriteCallback)(
+    const FLAC__StreamEncoder* encoder,
+    const FLAC__byte buffer[],
+    size_t bytes,
+    uint32_t samples,
+    uint32_t current_frame,
+    void* client_data);
 
 /** Signature for the seek callback.
  *
@@ -675,8 +689,13 @@
  * \param  client_data      The callee's client data set through
  *                          FLAC__stream_encoder_init_*().
  */
-typedef void (*FLAC__StreamEncoderProgressCallback)(const FLAC__StreamEncoder *encoder, FLAC__uint64 bytes_written, FLAC__uint64 samples_written, unsigned frames_written, unsigned total_frames_estimate, void *client_data);
-
+typedef void (*FLAC__StreamEncoderProgressCallback)(
+    const FLAC__StreamEncoder* encoder,
+    FLAC__uint64 bytes_written,
+    FLAC__uint64 samples_written,
+    uint32_t frames_written,
+    uint32_t total_frames_estimate,
+    void* client_data);
 
 /***********************************************************************
  *
@@ -743,8 +762,8 @@
  */
 FLAC_API FLAC__bool FLAC__stream_encoder_set_verify(FLAC__StreamEncoder *encoder, FLAC__bool value);
 
-/** Set the <A HREF="../format.html#subset">Subset</A> flag.  If \c true,
- *  the encoder will comply with the Subset and will check the
+/** Set the <A HREF="https://xiph.org/flac/format.html#subset">Subset</A> flag.
+ * If \c true, the encoder will comply with the Subset and will check the
  *  settings during FLAC__stream_encoder_init_*() to see if all settings
  *  comply.  If \c false, the settings may take advantage of the full
  *  range that the format allows.
@@ -771,7 +790,8 @@
  * \retval FLAC__bool
  *    \c false if the encoder is already initialized, else \c true.
  */
-FLAC_API FLAC__bool FLAC__stream_encoder_set_channels(FLAC__StreamEncoder *encoder, unsigned value);
+FLAC_API FLAC__bool
+FLAC__stream_encoder_set_channels(FLAC__StreamEncoder* encoder, uint32_t value);
 
 /** Set the sample resolution of the input to be encoded.
  *
@@ -787,7 +807,9 @@
  * \retval FLAC__bool
  *    \c false if the encoder is already initialized, else \c true.
  */
-FLAC_API FLAC__bool FLAC__stream_encoder_set_bits_per_sample(FLAC__StreamEncoder *encoder, unsigned value);
+FLAC_API FLAC__bool
+FLAC__stream_encoder_set_bits_per_sample(FLAC__StreamEncoder* encoder,
+                                         uint32_t value);
 
 /** Set the sample rate (in Hz) of the input to be encoded.
  *
@@ -799,7 +821,9 @@
  * \retval FLAC__bool
  *    \c false if the encoder is already initialized, else \c true.
  */
-FLAC_API FLAC__bool FLAC__stream_encoder_set_sample_rate(FLAC__StreamEncoder *encoder, unsigned value);
+FLAC_API FLAC__bool
+FLAC__stream_encoder_set_sample_rate(FLAC__StreamEncoder* encoder,
+                                     uint32_t value);
 
 /** Set the compression level
  *
@@ -843,15 +867,30 @@
  *  <td>max residual partition order</td>
  *  <td>rice parameter search dist</td>
  * </tr>
- * <tr>  <td><b>0</b></td> <td>false</td> <td>false</td> <td>tukey(0.5)<td>                                     <td>0</td>  <td>0</td> <td>false</td> <td>false</td> <td>false</td> <td>0</td> <td>3</td> <td>0</td> </tr>
- * <tr>  <td><b>1</b></td> <td>true</td>  <td>true</td>  <td>tukey(0.5)<td>                                     <td>0</td>  <td>0</td> <td>false</td> <td>false</td> <td>false</td> <td>0</td> <td>3</td> <td>0</td> </tr>
- * <tr>  <td><b>2</b></td> <td>true</td>  <td>false</td> <td>tukey(0.5)<td>                                     <td>0</td>  <td>0</td> <td>false</td> <td>false</td> <td>false</td> <td>0</td> <td>3</td> <td>0</td> </tr>
- * <tr>  <td><b>3</b></td> <td>false</td> <td>false</td> <td>tukey(0.5)<td>                                     <td>6</td>  <td>0</td> <td>false</td> <td>false</td> <td>false</td> <td>0</td> <td>4</td> <td>0</td> </tr>
- * <tr>  <td><b>4</b></td> <td>true</td>  <td>true</td>  <td>tukey(0.5)<td>                                     <td>8</td>  <td>0</td> <td>false</td> <td>false</td> <td>false</td> <td>0</td> <td>4</td> <td>0</td> </tr>
- * <tr>  <td><b>5</b></td> <td>true</td>  <td>false</td> <td>tukey(0.5)<td>                                     <td>8</td>  <td>0</td> <td>false</td> <td>false</td> <td>false</td> <td>0</td> <td>5</td> <td>0</td> </tr>
- * <tr>  <td><b>6</b></td> <td>true</td>  <td>false</td> <td>tukey(0.5);partial_tukey(2)<td>                    <td>8</td>  <td>0</td> <td>false</td> <td>false</td> <td>false</td> <td>0</td> <td>6</td> <td>0</td> </tr>
- * <tr>  <td><b>7</b></td> <td>true</td>  <td>false</td> <td>tukey(0.5);partial_tukey(2)<td>                    <td>12</td> <td>0</td> <td>false</td> <td>false</td> <td>false</td> <td>0</td> <td>6</td> <td>0</td> </tr>
- * <tr>  <td><b>8</b></td> <td>true</td>  <td>false</td> <td>tukey(0.5);partial_tukey(2);punchout_tukey(3)</td> <td>12</td> <td>0</td> <td>false</td> <td>false</td> <td>false</td> <td>0</td> <td>6</td> <td>0</td> </tr>
+ * <tr>  <td><b>0</b></td> <td>false</td> <td>false</td> <td>tukey(0.5)</td>
+ * <td>0</td>  <td>0</td> <td>false</td> <td>false</td> <td>false</td>
+ * <td>0</td> <td>3</td> <td>0</td> </tr> <tr>  <td><b>1</b></td> <td>true</td>
+ * <td>true</td>  <td>tukey(0.5)</td>         <td>0</td>  <td>0</td>
+ * <td>false</td> <td>false</td> <td>false</td> <td>0</td> <td>3</td> <td>0</td>
+ * </tr> <tr>  <td><b>2</b></td> <td>true</td>  <td>false</td>
+ * <td>tukey(0.5)</td>         <td>0</td>  <td>0</td> <td>false</td>
+ * <td>false</td> <td>false</td> <td>0</td> <td>3</td> <td>0</td> </tr> <tr>
+ * <td><b>3</b></td> <td>false</td> <td>false</td> <td>tukey(0.5)</td>
+ * <td>6</td>  <td>0</td> <td>false</td> <td>false</td> <td>false</td>
+ * <td>0</td> <td>4</td> <td>0</td> </tr> <tr>  <td><b>4</b></td> <td>true</td>
+ * <td>true</td>  <td>tukey(0.5)</td>         <td>8</td>  <td>0</td>
+ * <td>false</td> <td>false</td> <td>false</td> <td>0</td> <td>4</td> <td>0</td>
+ * </tr> <tr>  <td><b>5</b></td> <td>true</td>  <td>false</td>
+ * <td>tukey(0.5)</td>         <td>8</td>  <td>0</td> <td>false</td>
+ * <td>false</td> <td>false</td> <td>0</td> <td>5</td> <td>0</td> </tr> <tr>
+ * <td><b>6</b></td> <td>true</td>  <td>false</td> <td>subdivide_tukey(2)</td>
+ * <td>8</td>  <td>0</td> <td>false</td> <td>false</td> <td>false</td>
+ * <td>0</td> <td>6</td> <td>0</td> </tr> <tr>  <td><b>7</b></td> <td>true</td>
+ * <td>false</td> <td>subdivide_tukey(2)</td> <td>12</td> <td>0</td>
+ * <td>false</td> <td>false</td> <td>false</td> <td>0</td> <td>6</td> <td>0</td>
+ * </tr> <tr>  <td><b>8</b></td> <td>true</td>  <td>false</td>
+ * <td>subdivide_tukey(2)</td> <td>12</td> <td>0</td> <td>false</td>
+ * <td>false</td> <td>false</td> <td>0</td> <td>6</td> <td>0</td> </tr>
  * </table>
  *
  * \default \c 5
@@ -862,7 +901,9 @@
  * \retval FLAC__bool
  *    \c false if the encoder is already initialized, else \c true.
  */
-FLAC_API FLAC__bool FLAC__stream_encoder_set_compression_level(FLAC__StreamEncoder *encoder, unsigned value);
+FLAC_API FLAC__bool
+FLAC__stream_encoder_set_compression_level(FLAC__StreamEncoder* encoder,
+                                           uint32_t value);
 
 /** Set the blocksize to use while encoding.
  *
@@ -877,13 +918,15 @@
  * \retval FLAC__bool
  *    \c false if the encoder is already initialized, else \c true.
  */
-FLAC_API FLAC__bool FLAC__stream_encoder_set_blocksize(FLAC__StreamEncoder *encoder, unsigned value);
+FLAC_API FLAC__bool
+FLAC__stream_encoder_set_blocksize(FLAC__StreamEncoder* encoder,
+                                   uint32_t value);
 
 /** Set to \c true to enable mid-side encoding on stereo input.  The
  *  number of channels must be 2 for this to have any effect.  Set to
  *  \c false to use only independent channel coding.
  *
- * \default \c false
+ * \default \c true
  * \param  encoder  An encoder instance to set.
  * \param  value    Flag value (see above).
  * \assert
@@ -921,7 +964,7 @@
  * \c blackman, \c blackman_harris_4term_92db, \c connes, \c flattop,
  * \c gauss(STDDEV), \c hamming, \c hann, \c kaiser_bessel, \c nuttall,
  * \c rectangle, \c triangle, \c tukey(P), \c partial_tukey(n[/ov[/P]]),
- * \c punchout_tukey(n[/ov[/P]]), \c welch.
+ * \c punchout_tukey(n[/ov[/P]]), \c subdivide_tukey(n[/P]), \c welch.
  *
  * For \c gauss(STDDEV), STDDEV specifies the standard deviation
  * (0<STDDEV<=0.5).
@@ -948,6 +991,20 @@
  * and partial_tukey(3/0.3/0.5) are all valid. ov should be smaller than 1
  * and can be negative.
  *
+ * subdivide_tukey(n) is a more efficient reimplementation of
+ * partial_tukey and punchout_tukey taken together, recycling as much data
+ * as possible. It combines all possible non-redundant partial_tukey(n)
+ * and punchout_tukey(n) up to the n specified. Specifying
+ * subdivide_tukey(3) is equivalent to specifying tukey, partial_tukey(2),
+ * partial_tukey(3) and punchout_tukey(3), specifying subdivide_tukey(5)
+ * equivalently adds partial_tukey(4), punchout_tukey(4), partial_tukey(5)
+ * and punchout_tukey(5). To be able to reuse data as much as possible,
+ * the tukey taper is taken equal for all windows, and the P specified is
+ * applied for the smallest used window. In other words,
+ * subdivide_tukey(2/0.5) results in a taper equal to that of tukey(0.25)
+ * and subdivide_tukey(5) in a taper equal to that of tukey(0.1). The
+ * default P for subdivide_tukey when none is specified is 0.5.
+ *
  * Example specifications are \c "blackman" or
  * \c "hann;triangle;tukey(0.5);tukey(0.25);tukey(0.125)"
  *
@@ -963,6 +1020,8 @@
  * floating point array in which to store the window. Also note that the
  * values of P, STDDEV and ov are locale-specific, so if the comma
  * separator specified by the locale is a comma, a comma should be used.
+ * A locale-independent way is to specify using scientific notation,
+ * e.g. 5e-1 instad of 0.5 or 0,5.
  *
  * \default \c "tukey(0.5)"
  * \param  encoder        An encoder instance to set.
@@ -977,7 +1036,7 @@
 
 /** Set the maximum LPC order, or \c 0 to use only the fixed predictors.
  *
- * \default \c 0
+ * \default \c 8
  * \param  encoder  An encoder instance to set.
  * \param  value    See above.
  * \assert
@@ -985,16 +1044,14 @@
  * \retval FLAC__bool
  *    \c false if the encoder is already initialized, else \c true.
  */
-FLAC_API FLAC__bool FLAC__stream_encoder_set_max_lpc_order(FLAC__StreamEncoder *encoder, unsigned value);
+FLAC_API FLAC__bool
+FLAC__stream_encoder_set_max_lpc_order(FLAC__StreamEncoder* encoder,
+                                       uint32_t value);
 
 /** Set the precision, in bits, of the quantized linear predictor
  *  coefficients, or \c 0 to let the encoder select it based on the
  *  blocksize.
  *
- * \note
- * In the current implementation, qlp_coeff_precision + bits_per_sample must
- * be less than 32.
- *
  * \default \c 0
  * \param  encoder  An encoder instance to set.
  * \param  value    See above.
@@ -1003,7 +1060,9 @@
  * \retval FLAC__bool
  *    \c false if the encoder is already initialized, else \c true.
  */
-FLAC_API FLAC__bool FLAC__stream_encoder_set_qlp_coeff_precision(FLAC__StreamEncoder *encoder, unsigned value);
+FLAC_API FLAC__bool
+FLAC__stream_encoder_set_qlp_coeff_precision(FLAC__StreamEncoder* encoder,
+                                             uint32_t value);
 
 /** Set to \c false to use only the specified quantized linear predictor
  *  coefficient precision, or \c true to search neighboring precision
@@ -1066,7 +1125,9 @@
  * \retval FLAC__bool
  *    \c false if the encoder is already initialized, else \c true.
  */
-FLAC_API FLAC__bool FLAC__stream_encoder_set_min_residual_partition_order(FLAC__StreamEncoder *encoder, unsigned value);
+FLAC_API FLAC__bool FLAC__stream_encoder_set_min_residual_partition_order(
+    FLAC__StreamEncoder* encoder,
+    uint32_t value);
 
 /** Set the maximum partition order to search when coding the residual.
  *  This is used in tandem with
@@ -1081,7 +1142,7 @@
  *  all orders, using the mean of each context for its Rice parameter,
  *  and use the best.
  *
- * \default \c 0
+ * \default \c 5
  * \param  encoder  An encoder instance to set.
  * \param  value    See above.
  * \assert
@@ -1089,7 +1150,9 @@
  * \retval FLAC__bool
  *    \c false if the encoder is already initialized, else \c true.
  */
-FLAC_API FLAC__bool FLAC__stream_encoder_set_max_residual_partition_order(FLAC__StreamEncoder *encoder, unsigned value);
+FLAC_API FLAC__bool FLAC__stream_encoder_set_max_residual_partition_order(
+    FLAC__StreamEncoder* encoder,
+    uint32_t value);
 
 /** Deprecated.  Setting this value has no effect.
  *
@@ -1101,7 +1164,9 @@
  * \retval FLAC__bool
  *    \c false if the encoder is already initialized, else \c true.
  */
-FLAC_API FLAC__bool FLAC__stream_encoder_set_rice_parameter_search_dist(FLAC__StreamEncoder *encoder, unsigned value);
+FLAC_API FLAC__bool FLAC__stream_encoder_set_rice_parameter_search_dist(
+    FLAC__StreamEncoder* encoder,
+    uint32_t value);
 
 /** Set an estimate of the total samples that will be encoded.
  *  This is merely an estimate and may be set to \c 0 if unknown.
@@ -1200,7 +1265,30 @@
  *    \c false if the encoder is already initialized, or if
  *    \a num_blocks > 65535 if encoding to Ogg FLAC, else \c true.
  */
-FLAC_API FLAC__bool FLAC__stream_encoder_set_metadata(FLAC__StreamEncoder *encoder, FLAC__StreamMetadata **metadata, unsigned num_blocks);
+FLAC_API FLAC__bool
+FLAC__stream_encoder_set_metadata(FLAC__StreamEncoder* encoder,
+                                  FLAC__StreamMetadata** metadata,
+                                  uint32_t num_blocks);
+
+/** Set to \c true to make the encoder not output frames which contain
+ *  only constant subframes. This is beneficial for streaming
+ *  applications: very small frames can cause problems with buffering
+ *  as bitrates can drop as low 1kbit/s for CDDA audio encoded within
+ *  subset. The minimum bitrate for a FLAC file encoded with this
+ *  function used is raised to 1bit/sample (i.e. 48kbit/s for 48kHz
+ *  material).
+ *
+ * \default \c false
+ * \param  encoder  An encoder instance to set.
+ * \param  value    Flag value (see above).
+ * \assert
+ *    \code encoder != NULL \endcode
+ * \retval FLAC__bool
+ *    \c false if the encoder is already initialized, else \c true.
+ */
+FLAC_API FLAC__bool
+FLAC__stream_encoder_set_limit_min_bitrate(FLAC__StreamEncoder* encoder,
+                                           FLAC__bool value);
 
 /** Get the current encoder state.
  *
@@ -1254,7 +1342,14 @@
  * \assert
  *    \code encoder != NULL \endcode
  */
-FLAC_API void FLAC__stream_encoder_get_verify_decoder_error_stats(const FLAC__StreamEncoder *encoder, FLAC__uint64 *absolute_sample, unsigned *frame_number, unsigned *channel, unsigned *sample, FLAC__int32 *expected, FLAC__int32 *got);
+FLAC_API void FLAC__stream_encoder_get_verify_decoder_error_stats(
+    const FLAC__StreamEncoder* encoder,
+    FLAC__uint64* absolute_sample,
+    uint32_t* frame_number,
+    uint32_t* channel,
+    uint32_t* sample,
+    FLAC__int32* expected,
+    FLAC__int32* got);
 
 /** Get the "verify" flag.
  *
@@ -1266,7 +1361,7 @@
  */
 FLAC_API FLAC__bool FLAC__stream_encoder_get_verify(const FLAC__StreamEncoder *encoder);
 
-/** Get the <A HREF="../format.html#subset>Subset</A> flag.
+/** Get the <A HREF="https://xiph.org/flac/format.html#subset">Subset</A> flag.
  *
  * \param  encoder  An encoder instance to query.
  * \assert
@@ -1281,40 +1376,44 @@
  * \param  encoder  An encoder instance to query.
  * \assert
  *    \code encoder != NULL \endcode
- * \retval unsigned
+ * \retval uint32_t
  *    See FLAC__stream_encoder_set_channels().
  */
-FLAC_API unsigned FLAC__stream_encoder_get_channels(const FLAC__StreamEncoder *encoder);
+FLAC_API uint32_t
+FLAC__stream_encoder_get_channels(const FLAC__StreamEncoder* encoder);
 
 /** Get the input sample resolution setting.
  *
  * \param  encoder  An encoder instance to query.
  * \assert
  *    \code encoder != NULL \endcode
- * \retval unsigned
+ * \retval uint32_t
  *    See FLAC__stream_encoder_set_bits_per_sample().
  */
-FLAC_API unsigned FLAC__stream_encoder_get_bits_per_sample(const FLAC__StreamEncoder *encoder);
+FLAC_API uint32_t
+FLAC__stream_encoder_get_bits_per_sample(const FLAC__StreamEncoder* encoder);
 
 /** Get the input sample rate setting.
  *
  * \param  encoder  An encoder instance to query.
  * \assert
  *    \code encoder != NULL \endcode
- * \retval unsigned
+ * \retval uint32_t
  *    See FLAC__stream_encoder_set_sample_rate().
  */
-FLAC_API unsigned FLAC__stream_encoder_get_sample_rate(const FLAC__StreamEncoder *encoder);
+FLAC_API uint32_t
+FLAC__stream_encoder_get_sample_rate(const FLAC__StreamEncoder* encoder);
 
 /** Get the blocksize setting.
  *
  * \param  encoder  An encoder instance to query.
  * \assert
  *    \code encoder != NULL \endcode
- * \retval unsigned
+ * \retval uint32_t
  *    See FLAC__stream_encoder_set_blocksize().
  */
-FLAC_API unsigned FLAC__stream_encoder_get_blocksize(const FLAC__StreamEncoder *encoder);
+FLAC_API uint32_t
+FLAC__stream_encoder_get_blocksize(const FLAC__StreamEncoder* encoder);
 
 /** Get the "mid/side stereo coding" flag.
  *
@@ -1341,20 +1440,22 @@
  * \param  encoder  An encoder instance to query.
  * \assert
  *    \code encoder != NULL \endcode
- * \retval unsigned
+ * \retval uint32_t
  *    See FLAC__stream_encoder_set_max_lpc_order().
  */
-FLAC_API unsigned FLAC__stream_encoder_get_max_lpc_order(const FLAC__StreamEncoder *encoder);
+FLAC_API uint32_t
+FLAC__stream_encoder_get_max_lpc_order(const FLAC__StreamEncoder* encoder);
 
 /** Get the quantized linear predictor coefficient precision setting.
  *
  * \param  encoder  An encoder instance to query.
  * \assert
  *    \code encoder != NULL \endcode
- * \retval unsigned
+ * \retval uint32_t
  *    See FLAC__stream_encoder_set_qlp_coeff_precision().
  */
-FLAC_API unsigned FLAC__stream_encoder_get_qlp_coeff_precision(const FLAC__StreamEncoder *encoder);
+FLAC_API uint32_t FLAC__stream_encoder_get_qlp_coeff_precision(
+    const FLAC__StreamEncoder* encoder);
 
 /** Get the qlp coefficient precision search flag.
  *
@@ -1391,30 +1492,33 @@
  * \param  encoder  An encoder instance to query.
  * \assert
  *    \code encoder != NULL \endcode
- * \retval unsigned
+ * \retval uint32_t
  *    See FLAC__stream_encoder_set_min_residual_partition_order().
  */
-FLAC_API unsigned FLAC__stream_encoder_get_min_residual_partition_order(const FLAC__StreamEncoder *encoder);
+FLAC_API uint32_t FLAC__stream_encoder_get_min_residual_partition_order(
+    const FLAC__StreamEncoder* encoder);
 
 /** Get maximum residual partition order setting.
  *
  * \param  encoder  An encoder instance to query.
  * \assert
  *    \code encoder != NULL \endcode
- * \retval unsigned
+ * \retval uint32_t
  *    See FLAC__stream_encoder_set_max_residual_partition_order().
  */
-FLAC_API unsigned FLAC__stream_encoder_get_max_residual_partition_order(const FLAC__StreamEncoder *encoder);
+FLAC_API uint32_t FLAC__stream_encoder_get_max_residual_partition_order(
+    const FLAC__StreamEncoder* encoder);
 
 /** Get the Rice parameter search distance setting.
  *
  * \param  encoder  An encoder instance to query.
  * \assert
  *    \code encoder != NULL \endcode
- * \retval unsigned
+ * \retval uint32_t
  *    See FLAC__stream_encoder_set_rice_parameter_search_dist().
  */
-FLAC_API unsigned FLAC__stream_encoder_get_rice_parameter_search_dist(const FLAC__StreamEncoder *encoder);
+FLAC_API uint32_t FLAC__stream_encoder_get_rice_parameter_search_dist(
+    const FLAC__StreamEncoder* encoder);
 
 /** Get the previously set estimate of the total samples to be encoded.
  *  The encoder merely mimics back the value given to
@@ -1429,6 +1533,17 @@
  */
 FLAC_API FLAC__uint64 FLAC__stream_encoder_get_total_samples_estimate(const FLAC__StreamEncoder *encoder);
 
+/** Get the "limit_min_bitrate" flag.
+ *
+ * \param  encoder  An encoder instance to query.
+ * \assert
+ *    \code encoder != NULL \endcode
+ * \retval FLAC__bool
+ *    See FLAC__stream_encoder_set_limit_min_bitrate().
+ */
+FLAC_API FLAC__bool
+FLAC__stream_encoder_get_limit_min_bitrate(const FLAC__StreamEncoder* encoder);
+
 /** Initialize the encoder instance to encode native FLAC streams.
  *
  *  This flavor of initialization sets up the encoder to encode to a
@@ -1633,11 +1748,15 @@
 /** Initialize the encoder instance to encode native FLAC files.
  *
  *  This flavor of initialization sets up the encoder to encode to a plain
- *  FLAC file.  If POSIX fopen() semantics are not sufficient (for example,
- *  with Unicode filenames on Windows), you must use
+ *  FLAC file.  If POSIX fopen() semantics are not sufficient you must use
  *  FLAC__stream_encoder_init_FILE(), or FLAC__stream_encoder_init_stream()
  *  and provide callbacks for the I/O.
  *
+ *  On Windows, filename must be a UTF-8 encoded filename, which libFLAC
+ *  internally translates to an appropriate representation to use with
+ *  _wfopen. On all other systems, filename is passed to fopen without
+ *  any translation.
+ *
  *  This function should be called after FLAC__stream_encoder_new() and
  *  FLAC__stream_encoder_set_*() but before FLAC__stream_encoder_process()
  *  or FLAC__stream_encoder_process_interleaved().
@@ -1665,10 +1784,14 @@
 /** Initialize the encoder instance to encode Ogg FLAC files.
  *
  *  This flavor of initialization sets up the encoder to encode to a plain
- *  Ogg FLAC file.  If POSIX fopen() semantics are not sufficient (for example,
- *  with Unicode filenames on Windows), you must use
- *  FLAC__stream_encoder_init_ogg_FILE(), or FLAC__stream_encoder_init_ogg_stream()
- *  and provide callbacks for the I/O.
+ *  Ogg FLAC file.  If POSIX fopen() semantics are not sufficient, you must use
+ *  FLAC__stream_encoder_init_ogg_FILE(), or
+ * FLAC__stream_encoder_init_ogg_stream() and provide callbacks for the I/O.
+ *
+ *  On Windows, filename must be a UTF-8 encoded filename, which libFLAC
+ *  internally translates to an appropriate representation to use with
+ *  _wfopen. On all other systems, filename is passed to fopen without
+ *  any translation.
  *
  *  This function should be called after FLAC__stream_encoder_new() and
  *  FLAC__stream_encoder_set_*() but before FLAC__stream_encoder_process()
@@ -1734,20 +1857,22 @@
  *
  *  For applications where channel order is important, channels must
  *  follow the order as described in the
- *  <A HREF="../format.html#frame_header">frame header</A>.
+ *  <A HREF="https://xiph.org/flac/format.html#frame_header">frame header</A>.
  *
  * \param  encoder  An initialized encoder instance in the OK state.
  * \param  buffer   An array of pointers to each channel's signal.
  * \param  samples  The number of samples in one channel.
  * \assert
  *    \code encoder != NULL \endcode
- *    \code FLAC__stream_encoder_get_state(encoder) == FLAC__STREAM_ENCODER_OK \endcode
- * \retval FLAC__bool
- *    \c true if successful, else \c false; in this case, check the
- *    encoder state with FLAC__stream_encoder_get_state() to see what
- *    went wrong.
+ *    \code FLAC__stream_encoder_get_state(encoder) == FLAC__STREAM_ENCODER_OK
+ * \endcode \retval FLAC__bool \c true if successful, else \c false; in this
+ * case, check the encoder state with FLAC__stream_encoder_get_state() to see
+ * what went wrong.
  */
-FLAC_API FLAC__bool FLAC__stream_encoder_process(FLAC__StreamEncoder *encoder, const FLAC__int32 * const buffer[], unsigned samples);
+FLAC_API FLAC__bool
+FLAC__stream_encoder_process(FLAC__StreamEncoder* encoder,
+                             const FLAC__int32* const buffer[],
+                             uint32_t samples);
 
 /** Submit data for encoding.
  *  This version allows you to supply the input data where the channels
@@ -1763,7 +1888,7 @@
  *
  *  For applications where channel order is important, channels must
  *  follow the order as described in the
- *  <A HREF="../format.html#frame_header">frame header</A>.
+ *  <A HREF="https://xiph.org/flac/format.html#frame_header">frame header</A>.
  *
  * \param  encoder  An initialized encoder instance in the OK state.
  * \param  buffer   An array of channel-interleaved data (see above).
@@ -1773,13 +1898,15 @@
  *                  to a \a buffer of 2000 values.
  * \assert
  *    \code encoder != NULL \endcode
- *    \code FLAC__stream_encoder_get_state(encoder) == FLAC__STREAM_ENCODER_OK \endcode
- * \retval FLAC__bool
- *    \c true if successful, else \c false; in this case, check the
- *    encoder state with FLAC__stream_encoder_get_state() to see what
- *    went wrong.
+ *    \code FLAC__stream_encoder_get_state(encoder) == FLAC__STREAM_ENCODER_OK
+ * \endcode \retval FLAC__bool \c true if successful, else \c false; in this
+ * case, check the encoder state with FLAC__stream_encoder_get_state() to see
+ * what went wrong.
  */
-FLAC_API FLAC__bool FLAC__stream_encoder_process_interleaved(FLAC__StreamEncoder *encoder, const FLAC__int32 buffer[], unsigned samples);
+FLAC_API FLAC__bool
+FLAC__stream_encoder_process_interleaved(FLAC__StreamEncoder* encoder,
+                                         const FLAC__int32 buffer[],
+                                         uint32_t samples);
 
 /* \} */
 

diff --git a/include/share/alloc.h b/include/share/alloc.h
index 2a6ac39..6ef79e4 100644
--- a/include/share/alloc.h
+++ b/include/share/alloc.h

@@ -1,20 +1,365 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
+/* alloc - Convenience routines for safely allocating memory
+ * Copyright (C) 2007-2009  Josh Coalson
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * - Neither the name of the Xiph.org Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
 
-#ifndef THIRD_PARTY_FLAC_INCLUDE_SHARE_ALLOC_H
-#define THIRD_PARTY_FLAC_INCLUDE_SHARE_ALLOC_H
+#ifndef FLAC__SHARE__ALLOC_H
+#define FLAC__SHARE__ALLOC_H
 
-void *safe_malloc_(size_t size);
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
 
-void *safe_calloc_(size_t num_items, size_t size);
+/* WATCHOUT: for c++ you may have to #define __STDC_LIMIT_MACROS 1 real early
+ * before #including this file,  otherwise SIZE_MAX might not be defined
+ */
 
-void *safe_malloc_add_2op_(size_t size1, size_t size2);
+#include <limits.h> /* for SIZE_MAX */
+#ifdef HAVE_STDINT_H
+#include <stdint.h> /* for SIZE_MAX in case limits.h didn't get it */
+#endif
+#include <stdlib.h> /* for size_t, malloc(), etc */
+#include "share/compat.h"
 
-void *safe_malloc_mul_2op_(size_t size1, size_t size2);
+#ifndef SIZE_MAX
+#ifndef SIZE_T_MAX
+#ifdef _MSC_VER
+#ifdef _WIN64
+#define SIZE_T_MAX FLAC__U64L(0xffffffffffffffff)
+#else
+#define SIZE_T_MAX 0xffffffff
+#endif
+#else
+#error
+#endif
+#endif
+#define SIZE_MAX SIZE_T_MAX
+#endif
 
-void *safe_malloc_muladd2_(size_t size1, size_t size2, size_t size3);
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+extern int alloc_check_threshold, alloc_check_counter;
 
-void *safe_realloc_mul_2op_(void *ptr, size_t size1, size_t size2);
+static inline int alloc_check() {
+  if (alloc_check_threshold == INT32_MAX) {
+    return 0;
+  } else if (alloc_check_counter++ == alloc_check_threshold) {
+    return 1;
+  } else {
+    return 0;
+  }
+}
 
-#endif  // THIRD_PARTY_FLAC_INCLUDE_SHARE_ALLOC_H
+#endif
+
+/* avoid malloc()ing 0 bytes, see:
+ * https://www.securecoding.cert.org/confluence/display/seccode/MEM04-A.+Do+not+make+assumptions+about+the+result+of+allocating+0+bytes?focusedCommentId=5407003
+ */
+
+static inline void* safe_malloc_(size_t size) {
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+  /* Fail if requested */
+  if (alloc_check()) {
+    return NULL;
+  }
+#endif
+  /* malloc(0) is undefined; FLAC src convention is to always allocate */
+  if (!size) {
+    size++;
+  }
+  return malloc(size);
+}
+
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+static inline void* malloc_(size_t size) {
+  /* Fail if requested */
+  if (alloc_check()) {
+    return NULL;
+  }
+  return malloc(size);
+}
+#else
+#define malloc_ malloc
+#endif
+
+static inline void* safe_calloc_(size_t nmemb, size_t size) {
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+  /* Fail if requested */
+  if (alloc_check()) {
+    return NULL;
+  }
+#endif
+  if (!nmemb || !size) {
+    return malloc(1); /* malloc(0) is undefined; FLAC src convention is to
+                         always allocate */
+  }
+  return calloc(nmemb, size);
+}
+
+/*@@@@ there's probably a better way to prevent overflows when allocating
+ * untrusted sums but this works for now */
+
+static inline void* safe_malloc_add_2op_(size_t size1, size_t size2) {
+  size2 += size1;
+  if (size2 < size1) {
+    return 0;
+  }
+  return safe_malloc_(size2);
+}
+
+static inline void* safe_malloc_add_3op_(size_t size1,
+                                         size_t size2,
+                                         size_t size3) {
+  size2 += size1;
+  if (size2 < size1) {
+    return 0;
+  }
+  size3 += size2;
+  if (size3 < size2) {
+    return 0;
+  }
+  return safe_malloc_(size3);
+}
+
+static inline void* safe_malloc_add_4op_(size_t size1,
+                                         size_t size2,
+                                         size_t size3,
+                                         size_t size4) {
+  size2 += size1;
+  if (size2 < size1) {
+    return 0;
+  }
+  size3 += size2;
+  if (size3 < size2) {
+    return 0;
+  }
+  size4 += size3;
+  if (size4 < size3) {
+    return 0;
+  }
+  return safe_malloc_(size4);
+}
+
+void* safe_malloc_mul_2op_(size_t size1, size_t size2);
+
+static inline void* safe_malloc_mul_3op_(size_t size1,
+                                         size_t size2,
+                                         size_t size3) {
+  if (!size1 || !size2 || !size3) {
+    return malloc(1); /* malloc(0) is undefined; FLAC src convention is to
+                         always allocate */
+  }
+  if (size1 > SIZE_MAX / size2) {
+    return 0;
+  }
+  size1 *= size2;
+  if (size1 > SIZE_MAX / size3) {
+    return 0;
+  }
+  return malloc_(size1 * size3);
+}
+
+/* size1*size2 + size3 */
+static inline void* safe_malloc_mul2add_(size_t size1,
+                                         size_t size2,
+                                         size_t size3) {
+  if (!size1 || !size2) {
+    return safe_malloc_(size3);
+  }
+  if (size1 > SIZE_MAX / size2) {
+    return 0;
+  }
+  return safe_malloc_add_2op_(size1 * size2, size3);
+}
+
+/* size1 * (size2 + size3) */
+static inline void* safe_malloc_muladd2_(size_t size1,
+                                         size_t size2,
+                                         size_t size3) {
+  if (!size1 || (!size2 && !size3)) {
+    return malloc(1); /* malloc(0) is undefined; FLAC src convention is to
+                         always allocate */
+  }
+  size2 += size3;
+  if (size2 < size3) {
+    return 0;
+  }
+  if (size1 > SIZE_MAX / size2) {
+    return 0;
+  }
+  return malloc_(size1 * size2);
+}
+
+static inline void* safe_realloc_(void* ptr, size_t size) {
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+  /* Fail if requested */
+  if (alloc_check() && size > 0) {
+    free(ptr);
+    return NULL;
+  }
+#endif
+  void* oldptr = ptr;
+  void* newptr = realloc(ptr, size);
+  if (size > 0 && newptr == 0) {
+    free(oldptr);
+  }
+  return newptr;
+}
+
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+static inline void* realloc_(void* ptr, size_t size) {
+  /* Fail if requested */
+  if (alloc_check()) {
+    return NULL;
+  }
+  return realloc(ptr, size);
+}
+#else
+#define realloc_ realloc
+#endif
+
+static inline void* safe_realloc_nofree_add_2op_(void* ptr,
+                                                 size_t size1,
+                                                 size_t size2) {
+  size2 += size1;
+  if (size2 < size1) {
+    return 0;
+  }
+  return realloc_(ptr, size2);
+}
+
+static inline void* safe_realloc_add_3op_(void* ptr,
+                                          size_t size1,
+                                          size_t size2,
+                                          size_t size3) {
+  size2 += size1;
+  if (size2 < size1) {
+    free(ptr);
+    return 0;
+  }
+  size3 += size2;
+  if (size3 < size2) {
+    free(ptr);
+    return 0;
+  }
+  return safe_realloc_(ptr, size3);
+}
+
+static inline void* safe_realloc_nofree_add_3op_(void* ptr,
+                                                 size_t size1,
+                                                 size_t size2,
+                                                 size_t size3) {
+  size2 += size1;
+  if (size2 < size1) {
+    return 0;
+  }
+  size3 += size2;
+  if (size3 < size2) {
+    return 0;
+  }
+  return realloc_(ptr, size3);
+}
+
+static inline void* safe_realloc_nofree_add_4op_(void* ptr,
+                                                 size_t size1,
+                                                 size_t size2,
+                                                 size_t size3,
+                                                 size_t size4) {
+  size2 += size1;
+  if (size2 < size1) {
+    return 0;
+  }
+  size3 += size2;
+  if (size3 < size2) {
+    return 0;
+  }
+  size4 += size3;
+  if (size4 < size3) {
+    return 0;
+  }
+  return realloc_(ptr, size4);
+}
+
+static inline void* safe_realloc_mul_2op_(void* ptr,
+                                          size_t size1,
+                                          size_t size2) {
+  if (!size1 || !size2) {
+    return realloc(ptr, 0); /* preserve POSIX realloc(ptr, 0) semantics */
+  }
+  if (size1 > SIZE_MAX / size2) {
+    free(ptr);
+    return 0;
+  }
+  return safe_realloc_(ptr, size1 * size2);
+}
+
+static inline void* safe_realloc_nofree_mul_2op_(void* ptr,
+                                                 size_t size1,
+                                                 size_t size2) {
+  if (!size1 || !size2) {
+    return realloc(ptr, 0); /* preserve POSIX realloc(ptr, 0) semantics */
+  }
+  if (size1 > SIZE_MAX / size2) {
+    return 0;
+  }
+  return realloc_(ptr, size1 * size2);
+}
+
+/* size1 * (size2 + size3) */
+static inline void* safe_realloc_muladd2_(void* ptr,
+                                          size_t size1,
+                                          size_t size2,
+                                          size_t size3) {
+  if (!size1 || (!size2 && !size3)) {
+    return realloc(ptr, 0); /* preserve POSIX realloc(ptr, 0) semantics */
+  }
+  size2 += size3;
+  if (size2 < size3) {
+    free(ptr);
+    return 0;
+  }
+  return safe_realloc_mul_2op_(ptr, size1, size2);
+}
+
+/* size1 * (size2 + size3) */
+static inline void* safe_realloc_nofree_muladd2_(void* ptr,
+                                                 size_t size1,
+                                                 size_t size2,
+                                                 size_t size3) {
+  if (!size1 || (!size2 && !size3)) {
+    return realloc(ptr, 0); /* preserve POSIX realloc(ptr, 0) semantics */
+  }
+  size2 += size3;
+  if (size2 < size3) {
+    return 0;
+  }
+  return safe_realloc_nofree_mul_2op_(ptr, size1, size2);
+}
+
+#endif

diff --git a/include/share/compat.h b/include/share/compat.h
index b158e06..d9ab51d 100644
--- a/include/share/compat.h
+++ b/include/share/compat.h

@@ -1,5 +1,5 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2012-2014  Xiph.org Foundation
+ * Copyright (C) 2012-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -29,7 +29,7 @@
  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-/* This is the prefered location of all CPP hackery to make $random_compiler
+/* This is the preferred location of all CPP hackery to make $random_compiler
  * work like something approaching a C99 (or maybe more accurately GNU99)
  * compiler.
  *
@@ -39,6 +39,9 @@
 #ifndef FLAC__SHARE__COMPAT_H
 #define FLAC__SHARE__COMPAT_H
 
+#include <stdarg.h>
+#include <stddef.h>
+
 #if defined _WIN32 && !defined __CYGWIN__
 /* where MSVC puts unlink() */
 # include <io.h>
@@ -62,7 +65,7 @@
 #define FLAC__off_t off_t
 #endif
 
-#if HAVE_INTTYPES_H
+#ifdef HAVE_INTTYPES_H
 #define __STDC_FORMAT_MACROS
 #include <inttypes.h>
 #endif
@@ -72,7 +75,7 @@
 #define strtoull _strtoui64
 #endif
 
-#if defined(_MSC_VER)
+#if defined(_MSC_VER) && !defined(__cplusplus)
 #define inline __inline
 #endif
 
@@ -87,7 +90,10 @@
 
 #define FLAC__U64L(x) x##ULL
 
-#if defined _MSC_VER || defined __BORLANDC__ || defined __MINGW32__
+#if defined _MSC_VER || defined __MINGW32__
+#define FLAC__STRCASECMP _stricmp
+#define FLAC__STRNCASECMP _strnicmp
+#elif defined __BORLANDC__
 #define FLAC__STRCASECMP stricmp
 #define FLAC__STRNCASECMP strnicmp
 #else
@@ -95,7 +101,7 @@
 #define FLAC__STRNCASECMP strncasecmp
 #endif
 
-#if defined _MSC_VER || defined __MINGW32__ || defined __CYGWIN__ || defined __EMX__
+#if defined _MSC_VER || defined __MINGW32__ || defined __EMX__
 #include <io.h> /* for _setmode(), chmod() */
 #include <fcntl.h> /* for _O_BINARY */
 #else
@@ -109,12 +115,18 @@
 #include <sys/utime.h> /* for utime() */
 #endif
 #else
+#if defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200809L)
+#include <fcntl.h>
+#else
 #include <sys/types.h> /* some flavors of BSD (like OS X) require this to get time_t */
 #include <utime.h> /* for utime() */
 #endif
+#endif
 
 #if defined _MSC_VER
-#  if _MSC_VER >= 1600
+#if _MSC_VER >= 1800
+#include <inttypes.h>
+#elif _MSC_VER >= 1600
 /* Visual Studio 2010 has decent C99 support */
 #    include <stdint.h>
 #    define PRIu64 "llu"
@@ -124,25 +136,45 @@
 #    include <limits.h>
 #    ifndef UINT32_MAX
 #      define UINT32_MAX _UI32_MAX
-#    endif
-     typedef unsigned __int64 uint64_t;
-     typedef unsigned __int32 uint32_t;
-     typedef unsigned __int16 uint16_t;
-     typedef unsigned __int8 uint8_t;
-     typedef __int64 int64_t;
-     typedef __int32 int32_t;
-     typedef __int16 int16_t;
-     typedef __int8  int8_t;
+#endif
 #    define PRIu64 "I64u"
 #    define PRId64 "I64d"
 #    define PRIx64 "I64x"
 #  endif
+#if defined(_USING_V110_SDK71_) && !defined(_DLL)
+#pragma message( \
+        "WARNING: This compile will NOT FUNCTION PROPERLY on Windows XP. See comments in include/share/compat.h for details")
+#define FLAC__USE_FILELENGTHI64
+/*
+ *************************************************************************************
+ * V110_SDK71, in MSVC 2017 also known as v141_xp, is a platform toolset that is
+ *supposed to target Windows XP. It turns out however that certain functions
+ *provided silently fail on Windows XP only, which makes debugging challenging.
+ *This only occurs when building with /MT. This problem has been reported to
+ *Microsoft, but there hasn't been a fix for years. See
+ * https://web.archive.org/web/20170327195018/https://connect.microsoft.com/VisualStudio/feedback/details/1557168/wstat64-returns-1-on-xp-always
+ *
+ * It is known that this problem affects the functions _wstat64 (used by
+ *flac_stat i.e. stat64_utf8) and _fstat64 (i.e. flac_fstat) and therefore
+ *affects both libFLAC in several places as well as the flac and metaflac
+ *command line tools
+ *
+ * As the extent of this problem is unknown and Microsoft seems unwilling to fix
+ *it, users of libFLAC building with Visual Studio are encouraged to not use the
+ * /MT compile switch when explicitly targeting Windows XP. When use of /MT is
+ *deemed necessary with this toolset, be sure to check whether your application
+ *works properly on Windows XP. It is also possible to build for Windows XP with
+ *MinGW instead.
+ *************************************************************************************
+ */
+#endif
 #endif /* defined _MSC_VER */
 
 #ifdef _WIN32
-/* All char* strings are in UTF-8 format. Added to support Unicode files on Windows */
-#include "share/win_utf8_io.h"
+/* All char* strings are in UTF-8 format. Added to support Unicode files on
+ * Windows */
 
+#include "share/win_utf8_io.h"
 #define flac_printf printf_utf8
 #define flac_fprintf fprintf_utf8
 #define flac_vfprintf vfprintf_utf8
@@ -151,20 +183,25 @@
 #define flac_utime utime_utf8
 #define flac_unlink unlink_utf8
 #define flac_rename rename_utf8
-#define flac_stat _stat64_utf8
+#define flac_stat stat64_utf8
 
 #else
 
 #define flac_printf printf
 #define flac_fprintf fprintf
 #define flac_vfprintf vfprintf
+
 #define flac_fopen fopen
 #define flac_chmod chmod
-#define flac_utime utime
 #define flac_unlink unlink
 #define flac_rename rename
 #define flac_stat stat
 
+#if defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200809L)
+#define flac_utime(a, b) utimensat(AT_FDCWD, a, *b, 0)
+#else
+#define flac_utime utime
+#endif
 #endif
 
 #ifdef _WIN32
@@ -190,9 +227,8 @@
  * snprintf as well as Microsoft Visual Studio which has an non-standards
  * conformant snprint_s function.
  *
- * This function wraps the MS version to behave more like the the ISO version.
+ * This function wraps the MS version to behave more like the ISO version.
  */
-#include <stdarg.h>
 #ifdef __cplusplus
 extern "C" {
 #endif

diff --git a/include/share/endswap.h b/include/share/endswap.h
index 4fde4c1..689b475 100644
--- a/include/share/endswap.h
+++ b/include/share/endswap.h

@@ -1,5 +1,5 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2012-2014  Xiph.org Foundation
+ * Copyright (C) 2012-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -31,10 +31,10 @@
 
 /* It is assumed that this header will be included after "config.h". */
 
-#if HAVE_BSWAP32			/* GCC and Clang */
+#ifdef HAVE_BSWAP32 /* GCC and Clang */
 
 /* GCC prior to 4.8 didn't provide bswap16 on x86_64 */
-#if ! HAVE_BSWAP16
+#ifndef HAVE_BSWAP16
 static inline unsigned short __builtin_bswap16(unsigned short a)
 {
 	return (a<<8)|(a>>8);
@@ -43,11 +43,15 @@
 
 #define	ENDSWAP_16(x)		(__builtin_bswap16 (x))
 #define	ENDSWAP_32(x)		(__builtin_bswap32 (x))
+#define ENDSWAP_64(x) (__builtin_bswap64(x))
 
-#elif defined _MSC_VER		/* Windows. Apparently in <stdlib.h>. */
+#elif defined _MSC_VER /* Windows */
+
+#include <stdlib.h>
 
 #define	ENDSWAP_16(x)		(_byteswap_ushort (x))
 #define	ENDSWAP_32(x)		(_byteswap_ulong (x))
+#define ENDSWAP_64(x) (_byteswap_uint64(x))
 
 #elif defined HAVE_BYTESWAP_H		/* Linux */
 
@@ -55,16 +59,19 @@
 
 #define	ENDSWAP_16(x)		(bswap_16 (x))
 #define	ENDSWAP_32(x)		(bswap_32 (x))
+#define ENDSWAP_64(x) (bswap_64(x))
 
 #else
 
 #define	ENDSWAP_16(x)		((((x) >> 8) & 0xFF) | (((x) & 0xFF) << 8))
 #define	ENDSWAP_32(x)		((((x) >> 24) & 0xFF) | (((x) >> 8) & 0xFF00) | (((x) & 0xFF00) << 8) | (((x) & 0xFF) << 24))
+#define ENDSWAP_64(x)                             \
+        ((ENDSWAP_32(((x) >> 32) & 0xFFFFFFFF)) | \
+         (ENDSWAP_32((x)&0xFFFFFFFF) << 32))
 
 #endif
 
-
-/* Host to little-endian byte swapping. */
+/* Host to little-endian byte swapping (for MD5 calculation) */
 #if CPU_IS_BIG_ENDIAN
 
 #define H2LE_16(x)		ENDSWAP_16 (x)

diff --git a/include/share/getopt.h b/include/share/getopt.h
new file mode 100644
index 0000000..aef82b6
--- /dev/null
+++ b/include/share/getopt.h

@@ -0,0 +1,189 @@
+/*
+        NOTE:
+        I cannot get the vanilla getopt code to work (i.e. compile only what
+        is needed and not duplicate symbols found in the standard library)
+        on all the platforms that FLAC supports.  In particular the gating
+        of code with the ELIDE_CODE #define is not accurate enough on systems
+        that are POSIX but not glibc.  If someone has a patch that works on
+        GNU/Linux, Darwin, AND Solaris please submit it on the project page:
+                https://sourceforge.net/p/flac/patches/
+
+        In the meantime I have munged the global symbols and removed gates
+        around code, while at the same time trying to touch the original as
+        little as possible.
+*/
+/* Declarations for getopt.
+   Copyright (C) 1989,90,91,92,93,94,96,97,98 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If not,
+   write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+   Boston, MA 02110-1301, USA.  */
+
+#ifndef SHARE__GETOPT_H
+#define SHARE__GETOPT_H
+
+/*[JEC] was:#ifndef __need_getopt*/
+/*[JEC] was:# define _GETOPT_H 1*/
+/*[JEC] was:#endif*/
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* For communication from `share__getopt' to the caller.
+   When `share__getopt' finds an option that takes an argument,
+   the argument value is returned here.
+   Also, when `ordering' is RETURN_IN_ORDER,
+   each non-option ARGV-element is returned here.  */
+
+extern char* share__optarg;
+
+/* Index in ARGV of the next element to be scanned.
+   This is used for communication to and from the caller
+   and for communication between successive calls to `share__getopt'.
+
+   On entry to `share__getopt', zero means this is the first call; initialize.
+
+   When `share__getopt' returns -1, this is the index of the first of the
+   non-option elements that the caller should itself scan.
+
+   Otherwise, `share__optind' communicates from one call to the next
+   how much of ARGV has been scanned so far.  */
+
+extern int share__optind;
+
+/* Callers store zero here to inhibit the error message `share__getopt' prints
+   for unrecognized options.  */
+
+extern int share__opterr;
+
+/* Set to an option character which was unrecognized.  */
+
+extern int share__optopt;
+
+/*[JEC] was:#ifndef __need_getopt */
+/* Describe the long-named options requested by the application.
+   The LONG_OPTIONS argument to share__getopt_long or share__getopt_long_only is
+   a vector of `struct share__option' terminated by an element containing a name
+   which is zero.
+
+   The field `has_arg' is:
+   share__no_argument		(or 0) if the option does not take an argument,
+   share__required_argument	(or 1) if the option requires an argument,
+   share__optional_argument 	(or 2) if the option takes an optional argument.
+
+   If the field `flag' is not NULL, it points to a variable that is set
+   to the value given in the field `val' when the option is found, but
+   left unchanged if the option is not found.
+
+   To have a long-named option do something other than set an `int' to
+   a compiled-in constant, such as set a value from `share__optarg', set the
+   option's `flag' field to zero and its `val' field to a nonzero
+   value (the equivalent single-letter option character, if there is
+   one).  For long options that have a zero `flag' field, `share__getopt'
+   returns the contents of the `val' field.  */
+
+struct share__option {
+#if defined __STDC__ && __STDC__
+  const char* name;
+#else
+  char* name;
+#endif
+  /* has_arg can't be an enum because some compilers complain about
+     type mismatches in all the code that assumes it is an int.  */
+  int has_arg;
+  int* flag;
+  int val;
+};
+
+/* Names for the values of the `has_arg' field of `struct share__option'.  */
+
+#define share__no_argument 0
+#define share__required_argument 1
+#define share__optional_argument 2
+/*[JEC] was:#endif*/ /* need getopt */
+
+/* Get definitions and prototypes for functions to process the
+   arguments in ARGV (ARGC of them, minus the program name) for
+   options given in OPTS.
+
+   Return the option character from OPTS just read.  Return -1 when
+   there are no more options.  For unrecognized options, or options
+   missing arguments, `share__optopt' is set to the option letter, and '?' is
+   returned.
+
+   The OPTS string is a list of characters which are recognized option
+   letters, optionally followed by colons, specifying that that letter
+   takes an argument, to be placed in `share__optarg'.
+
+   If a letter in OPTS is followed by two colons, its argument is
+   optional.  This behavior is specific to the GNU `share__getopt'.
+
+   The argument `--' causes premature termination of argument
+   scanning, explicitly telling `share__getopt' that there are no more
+   options.
+
+   If OPTS begins with `--', then non-option arguments are treated as
+   arguments to the option '\0'.  This behavior is specific to the GNU
+   `share__getopt'.  */
+
+/*[JEC] was:#if defined __STDC__ && __STDC__*/
+/*[JEC] was:# ifdef __GNU_LIBRARY__*/
+/* Many other libraries have conflicting prototypes for getopt, with
+   differences in the consts, in stdlib.h.  To avoid compilation
+   errors, only prototype getopt for the GNU C library.  */
+extern int share__getopt(int argc, char* const* argv, const char* shortopts);
+/*[JEC] was:# else*/ /* not __GNU_LIBRARY__ */
+/*[JEC] was:extern int getopt ();*/
+/*[JEC] was:# endif*/ /* __GNU_LIBRARY__ */
+
+/*[JEC] was:# ifndef __need_getopt*/
+extern int share__getopt_long(int argc,
+                              char* const* argv,
+                              const char* shortopts,
+                              const struct share__option* longopts,
+                              int* longind);
+extern int share__getopt_long_only(int argc,
+                                   char* const* argv,
+                                   const char* shortopts,
+                                   const struct share__option* longopts,
+                                   int* longind);
+
+/* Internal only.  Users should not call this directly.  */
+extern int share___getopt_internal(int argc,
+                                   char* const* argv,
+                                   const char* shortopts,
+                                   const struct share__option* longopts,
+                                   int* longind,
+                                   int long_only);
+/*[JEC] was:# endif*/
+/*[JEC] was:#else*/ /* not __STDC__ */
+/*[JEC] was:extern int getopt ();*/
+/*[JEC] was:# ifndef __need_getopt*/
+/*[JEC] was:extern int getopt_long ();*/
+/*[JEC] was:extern int getopt_long_only ();*/
+
+/*[JEC] was:extern int _getopt_internal ();*/
+/*[JEC] was:# endif*/
+/*[JEC] was:#endif*/ /* __STDC__ */
+
+#ifdef __cplusplus
+}
+#endif
+
+/* Make sure we later can get all the definitions and declarations.  */
+/*[JEC] was:#undef __need_getopt*/
+
+#endif /* getopt.h */

diff --git a/include/share/grabbag.h b/include/share/grabbag.h
new file mode 100644
index 0000000..55b6667
--- /dev/null
+++ b/include/share/grabbag.h

@@ -0,0 +1,30 @@
+/* grabbag - Convenience lib for various routines common to several tools
+ * Copyright (C) 2002-2009  Josh Coalson
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
+ */
+
+#ifndef SHARE__GRABBAG_H
+#define SHARE__GRABBAG_H
+
+/* These can't be included by themselves, only from within grabbag.h */
+#include "grabbag/cuesheet.h"
+#include "grabbag/file.h"
+#include "grabbag/picture.h"
+#include "grabbag/replaygain.h"
+#include "grabbag/seektable.h"
+
+#endif

diff --git a/include/share/grabbag/cuesheet.h b/include/share/grabbag/cuesheet.h
new file mode 100644
index 0000000..ddf78f2
--- /dev/null
+++ b/include/share/grabbag/cuesheet.h

@@ -0,0 +1,55 @@
+/* grabbag - Convenience lib for various routines common to several tools
+ * Copyright (C) 2002-2009  Josh Coalson
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
+ */
+
+/* This .h cannot be included by itself; #include "share/grabbag.h" instead. */
+
+#ifndef GRABBAG__CUESHEET_H
+#define GRABBAG__CUESHEET_H
+
+#include <stdio.h>
+#include "FLAC/metadata.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+uint32_t grabbag__cuesheet_msf_to_frame(uint32_t minutes,
+                                        uint32_t seconds,
+                                        uint32_t frames);
+void grabbag__cuesheet_frame_to_msf(uint32_t frame,
+                                    uint32_t* minutes,
+                                    uint32_t* seconds,
+                                    uint32_t* frames);
+
+FLAC__StreamMetadata* grabbag__cuesheet_parse(FILE* file,
+                                              const char** error_message,
+                                              uint32_t* last_line_read,
+                                              uint32_t sample_rate,
+                                              FLAC__bool is_cdda,
+                                              FLAC__uint64 lead_out_offset);
+
+void grabbag__cuesheet_emit(FILE* file,
+                            const FLAC__StreamMetadata* cuesheet,
+                            const char* file_reference);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif

diff --git a/include/share/grabbag/file.h b/include/share/grabbag/file.h
new file mode 100644
index 0000000..51ec0a8
--- /dev/null
+++ b/include/share/grabbag/file.h

@@ -0,0 +1,68 @@
+/* grabbag - Convenience lib for various routines common to several tools
+ * Copyright (C) 2002-2009  Josh Coalson
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
+ */
+
+/* Convenience routines for manipulating files */
+
+/* This .h cannot be included by itself; #include "share/grabbag.h" instead. */
+
+#ifndef GRABAG__FILE_H
+#define GRABAG__FILE_H
+
+/* needed because of off_t */
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdio.h>     /* for FILE */
+#include <sys/types.h> /* for off_t */
+#include "FLAC/ordinals.h"
+#include "share/compat.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void grabbag__file_copy_metadata(const char* srcpath, const char* destpath);
+FLAC__off_t grabbag__file_get_filesize(const char* srcpath);
+const char* grabbag__file_get_basename(const char* srcpath);
+
+/* read_only == false means "make file writable by user"
+ * read_only == true means "make file read-only for everyone"
+ */
+FLAC__bool grabbag__file_change_stats(const char* filename,
+                                      FLAC__bool read_only);
+
+/* returns true iff stat() succeeds for both files and they have the same device
+ * and inode. */
+/* on windows, uses GetFileInformationByHandle() to compare */
+FLAC__bool grabbag__file_are_same(const char* f1, const char* f2);
+
+/* attempts to make writable before unlinking */
+FLAC__bool grabbag__file_remove_file(const char* filename);
+
+/* these will forcibly set stdin/stdout to binary mode (for OSes that require
+ * it) */
+FILE* grabbag__file_get_binary_stdin(void);
+FILE* grabbag__file_get_binary_stdout(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif

diff --git a/include/share/grabbag/picture.h b/include/share/grabbag/picture.h
new file mode 100644
index 0000000..12251e0
--- /dev/null
+++ b/include/share/grabbag/picture.h

@@ -0,0 +1,62 @@
+/* grabbag - Convenience lib for various routines common to several tools
+ * Copyright (C) 2006-2009  Josh Coalson
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+/* This .h cannot be included by itself; #include "share/grabbag.h" instead. */
+
+#ifndef GRABBAG__PICTURE_H
+#define GRABBAG__PICTURE_H
+
+#include "FLAC/metadata.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* spec should be of the form
+ * "[TYPE]|MIME_TYPE|[DESCRIPTION]|[WIDTHxHEIGHTxDEPTH[/COLORS]]|FILE", e.g.
+ *   "|image/jpeg|||cover.jpg"
+ *   "4|image/jpeg||300x300x24|backcover.jpg"
+ *   "|image/png|description|300x300x24/71|cover.png"
+ *   "-->|image/gif||300x300x24/71|http://blah.blah.blah/cover.gif"
+ *
+ * empty type means default to FLAC__STREAM_METADATA_PICTURE_TYPE_FRONT_COVER
+ * empty resolution spec means to get from the file (cannot get used with "-->"
+ * linked images) spec and error_message must not be NULL
+ */
+FLAC__StreamMetadata* grabbag__picture_parse_specification(
+    const char* spec,
+    const char** error_message);
+
+typedef struct PictureResolution {
+  uint32_t width, height, depth, colors;
+} PictureResolution;
+
+FLAC__StreamMetadata* grabbag__picture_from_specification(
+    int type,
+    const char* mime_type,
+    const char* description,
+    const PictureResolution* res,
+    const char* filepath,
+    const char** error_message);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif

diff --git a/include/share/grabbag/replaygain.h b/include/share/grabbag/replaygain.h
new file mode 100644
index 0000000..cd648a5
--- /dev/null
+++ b/include/share/grabbag/replaygain.h

@@ -0,0 +1,123 @@
+/* grabbag - Convenience lib for various routines common to several tools
+ * Copyright (C) 2002-2009  Josh Coalson
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
+ */
+
+/*
+ * This wraps the replaygain_analysis lib, which is LGPL.  This wrapper
+ * allows analysis of different input resolutions by automatically
+ * scaling the input signal
+ */
+
+/* This .h cannot be included by itself; #include "share/grabbag.h" instead. */
+
+#ifndef GRABBAG__REPLAYGAIN_H
+#define GRABBAG__REPLAYGAIN_H
+
+#include "FLAC/metadata.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+extern const uint32_t GRABBAG__REPLAYGAIN_MAX_TAG_SPACE_REQUIRED;
+
+extern const FLAC__byte* const
+    GRABBAG__REPLAYGAIN_TAG_REFERENCE_LOUDNESS; /* =
+                                                   "REPLAYGAIN_REFERENCE_LOUDNESS"
+                                                 */
+extern const FLAC__byte* const
+    GRABBAG__REPLAYGAIN_TAG_TITLE_GAIN; /* = "REPLAYGAIN_TRACK_GAIN" */
+extern const FLAC__byte* const
+    GRABBAG__REPLAYGAIN_TAG_TITLE_PEAK; /* = "REPLAYGAIN_TRACK_PEAK" */
+extern const FLAC__byte* const
+    GRABBAG__REPLAYGAIN_TAG_ALBUM_GAIN; /* = "REPLAYGAIN_ALBUM_GAIN" */
+extern const FLAC__byte* const
+    GRABBAG__REPLAYGAIN_TAG_ALBUM_PEAK; /* = "REPLAYGAIN_ALBUM_PEAK" */
+
+FLAC__bool grabbag__replaygain_is_valid_sample_frequency(
+    uint32_t sample_frequency);
+
+FLAC__bool grabbag__replaygain_init(uint32_t sample_frequency);
+
+/* 'bps' must be valid for FLAC, i.e. >=4 and <= 32 */
+FLAC__bool grabbag__replaygain_analyze(const FLAC__int32* const input[],
+                                       FLAC__bool is_stereo,
+                                       uint32_t bps,
+                                       uint32_t samples);
+
+void grabbag__replaygain_get_album(float* gain, float* peak);
+void grabbag__replaygain_get_title(float* gain, float* peak);
+
+/* These three functions return an error string on error, or NULL if successful
+ */
+const char* grabbag__replaygain_analyze_file(const char* filename,
+                                             float* title_gain,
+                                             float* title_peak);
+const char* grabbag__replaygain_store_to_vorbiscomment(
+    FLAC__StreamMetadata* block,
+    float album_gain,
+    float album_peak,
+    float title_gain,
+    float title_peak);
+const char* grabbag__replaygain_store_to_vorbiscomment_reference(
+    FLAC__StreamMetadata* block);
+const char* grabbag__replaygain_store_to_vorbiscomment_album(
+    FLAC__StreamMetadata* block,
+    float album_gain,
+    float album_peak);
+const char* grabbag__replaygain_store_to_vorbiscomment_title(
+    FLAC__StreamMetadata* block,
+    float title_gain,
+    float title_peak);
+const char* grabbag__replaygain_store_to_file(const char* filename,
+                                              float album_gain,
+                                              float album_peak,
+                                              float title_gain,
+                                              float title_peak,
+                                              FLAC__bool preserve_modtime);
+const char* grabbag__replaygain_store_to_file_reference(
+    const char* filename,
+    FLAC__bool preserve_modtime);
+const char* grabbag__replaygain_store_to_file_album(
+    const char* filename,
+    float album_gain,
+    float album_peak,
+    FLAC__bool preserve_modtime);
+const char* grabbag__replaygain_store_to_file_title(
+    const char* filename,
+    float title_gain,
+    float title_peak,
+    FLAC__bool preserve_modtime);
+
+FLAC__bool grabbag__replaygain_load_from_vorbiscomment(
+    const FLAC__StreamMetadata* block,
+    FLAC__bool album_mode,
+    FLAC__bool strict,
+    double* reference,
+    double* gain,
+    double* peak);
+double grabbag__replaygain_compute_scale_factor(double peak,
+                                                double gain,
+                                                double preamp,
+                                                FLAC__bool prevent_clipping);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif

diff --git a/include/share/grabbag/seektable.h b/include/share/grabbag/seektable.h
new file mode 100644
index 0000000..03b755a
--- /dev/null
+++ b/include/share/grabbag/seektable.h

@@ -0,0 +1,45 @@
+/* grabbag - Convenience lib for various routines common to several tools
+ * Copyright (C) 2002-2009  Josh Coalson
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
+ */
+
+/* Convenience routines for working with seek tables */
+
+/* This .h cannot be included by itself; #include "share/grabbag.h" instead. */
+
+#ifndef GRABAG__SEEKTABLE_H
+#define GRABAG__SEEKTABLE_H
+
+#include "FLAC/format.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+FLAC__bool grabbag__seektable_convert_specification_to_template(
+    const char* spec,
+    FLAC__bool only_explicit_placeholders,
+    FLAC__uint64 total_samples_to_encode,
+    uint32_t sample_rate,
+    FLAC__StreamMetadata* seektable_template,
+    FLAC__bool* spec_has_real_points);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif

diff --git a/include/share/macros.h b/include/share/macros.h
new file mode 100644
index 0000000..e6a19da
--- /dev/null
+++ b/include/share/macros.h

@@ -0,0 +1,46 @@
+/* libFLAC - Free Lossless Audio Codec library
+ * Copyright (C) 2013-2022  Xiph.Org Foundation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * - Neither the name of the Xiph.org Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <errno.h>
+
+/* FLAC_CHECK_RETURN : Check the return value of the provided function and
+ * print an error message if it fails (ie returns a value < 0).
+ *
+ * Ideally, a library should not print anything, but this macro is only used
+ * for things that extremely unlikely to fail, like `chown` to a previoulsy
+ * saved `uid`.
+ */
+
+#define FLAC_CHECK_RETURN(x)                             \
+  {                                                      \
+    if ((x) < 0)                                         \
+      fprintf(stderr, "%s : %s\n", #x, strerror(errno)); \
+  }

diff --git a/include/share/private.h b/include/share/private.h
index 3a500d3..2b59754 100644
--- a/include/share/private.h
+++ b/include/share/private.h

@@ -1,5 +1,5 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2013-2014  Xiph.org Foundation
+ * Copyright (C) 2013-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -33,9 +33,12 @@
 #define FLAC__SHARE__PRIVATE_H
 
 /*
- * Unpublished debug routines from libFLAC> This should not be used from any
+ * Unpublished debug routines from libFLAC. This should not be used from any
  * client code other than code shipped with the FLAC sources.
  */
+FLAC_API FLAC__bool
+FLAC__stream_encoder_disable_instruction_set(FLAC__StreamEncoder* encoder,
+                                             FLAC__bool value);
 FLAC_API FLAC__bool FLAC__stream_encoder_disable_constant_subframes(FLAC__StreamEncoder *encoder, FLAC__bool value);
 FLAC_API FLAC__bool FLAC__stream_encoder_disable_fixed_subframes(FLAC__StreamEncoder *encoder, FLAC__bool value);
 FLAC_API FLAC__bool FLAC__stream_encoder_disable_verbatim_subframes(FLAC__StreamEncoder *encoder, FLAC__bool value);

diff --git a/include/share/replaygain_analysis.h b/include/share/replaygain_analysis.h
new file mode 100644
index 0000000..bce3b14
--- /dev/null
+++ b/include/share/replaygain_analysis.h

@@ -0,0 +1,65 @@
+/*
+ *  ReplayGainAnalysis - analyzes input samples and give the recommended dB
+ * change Copyright (C) 2001 David Robinson and Glen Sawyer
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2.1 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public
+ *  License along with this library; if not, write to the Free Software
+ *  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ *
+ *  concept and filter values by David Robinson (David@Robinson.org)
+ *    -- blame him if you think the idea is flawed
+ *  coding by Glen Sawyer (glensawyer@hotmail.com) 442 N 700 E, Provo, UT 84606
+ * USA
+ *    -- blame him if you think this runs too slowly, or the coding is otherwise
+ * flawed minor cosmetic tweaks to integrate with FLAC by Josh Coalson
+ *
+ *  For an explanation of the concepts and the basic algorithms involved, go to:
+ *    http://www.replaygain.org/
+ */
+
+#ifndef GAIN_ANALYSIS_H
+#define GAIN_ANALYSIS_H
+
+#include <stddef.h>
+
+#define GAIN_NOT_ENOUGH_SAMPLES -24601
+#define GAIN_ANALYSIS_ERROR 0
+#define GAIN_ANALYSIS_OK 1
+
+#define INIT_GAIN_ANALYSIS_ERROR 0
+#define INIT_GAIN_ANALYSIS_OK 1
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef float flac_float_t; /* Type used for filtering */
+
+extern flac_float_t
+    ReplayGainReferenceLoudness; /* in dB SPL, currently == 89.0 */
+
+int InitGainAnalysis(long samplefreq);
+int ValidGainFrequency(long samplefreq);
+int AnalyzeSamples(const flac_float_t* left_samples,
+                   const flac_float_t* right_samples,
+                   size_t num_samples,
+                   int num_channels);
+flac_float_t GetTitleGain(void);
+flac_float_t GetAlbumGain(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* GAIN_ANALYSIS_H */

diff --git a/include/share/replaygain_synthesis.h b/include/share/replaygain_synthesis.h
new file mode 100644
index 0000000..3a2b674
--- /dev/null
+++ b/include/share/replaygain_synthesis.h

@@ -0,0 +1,66 @@
+/* replaygain_synthesis - Routines for applying ReplayGain to a signal
+ * Copyright (C) 2002-2009  Josh Coalson
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301 USA
+ */
+
+#ifndef FLAC__SHARE__REPLAYGAIN_SYNTHESIS_H
+#define FLAC__SHARE__REPLAYGAIN_SYNTHESIS_H
+
+#include <stdlib.h> /* for size_t */
+#include "FLAC/format.h"
+
+#define FLAC_SHARE__MAX_SUPPORTED_CHANNELS FLAC__MAX_CHANNELS
+
+typedef enum {
+  NOISE_SHAPING_NONE = 0,
+  NOISE_SHAPING_LOW = 1,
+  NOISE_SHAPING_MEDIUM = 2,
+  NOISE_SHAPING_HIGH = 3
+} NoiseShaping;
+
+typedef struct {
+  const float* FilterCoeff;
+  FLAC__uint64 Mask;
+  double Add;
+  float Dither;
+  float ErrorHistory[FLAC_SHARE__MAX_SUPPORTED_CHANNELS]
+                    [16]; /* 16th order Noise shaping */
+  float DitherHistory[FLAC_SHARE__MAX_SUPPORTED_CHANNELS][16];
+  int LastRandomNumber[FLAC_SHARE__MAX_SUPPORTED_CHANNELS];
+  unsigned LastHistoryIndex;
+  NoiseShaping ShapingType;
+} DitherContext;
+
+void FLAC__replaygain_synthesis__init_dither_context(DitherContext* dither,
+                                                     int bits,
+                                                     int shapingtype);
+
+/* scale = (float) pow(10., (double)replaygain * 0.05); */
+size_t FLAC__replaygain_synthesis__apply_gain(FLAC__byte* data_out,
+                                              FLAC__bool little_endian_data_out,
+                                              FLAC__bool unsigned_data_out,
+                                              const FLAC__int32* const input[],
+                                              uint32_t wide_samples,
+                                              uint32_t channels,
+                                              const uint32_t source_bps,
+                                              const uint32_t target_bps,
+                                              const double scale,
+                                              const FLAC__bool hard_limit,
+                                              FLAC__bool do_dithering,
+                                              DitherContext* dither_context);
+
+#endif

diff --git a/include/share/safe_str.h b/include/share/safe_str.h
new file mode 100644
index 0000000..7f3ac66
--- /dev/null
+++ b/include/share/safe_str.h

@@ -0,0 +1,73 @@
+/* libFLAC - Free Lossless Audio Codec library
+ * Copyright (C) 2013-2022  Xiph.Org Foundation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * - Neither the name of the Xiph.org Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* Safe string handling functions to replace things like strcpy, strncpy,
+ * strcat, strncat etc.
+ * All of these functions guarantee a correctly NUL terminated string but
+ * the string may be truncated if the destination buffer was too short.
+ */
+
+#ifndef FLAC__SHARE_SAFE_STR_H
+#define FLAC__SHARE_SAFE_STR_H
+
+static inline char* safe_strncat(char* dest,
+                                 const char* src,
+                                 size_t dest_size) {
+  char* ret;
+
+  if (dest_size < 1) {
+    return dest;
+  }
+
+  /* Assume dist has space for a term character .. */
+  ret = strncat(dest, src, dest_size - strlen(dest));
+  /* .. but set it explicitly. */
+  dest[dest_size - 1] = 0;
+
+  return ret;
+}
+
+static inline char* safe_strncpy(char* dest,
+                                 const char* src,
+                                 size_t dest_size) {
+  char* ret;
+
+  if (dest_size < 1) {
+    return dest;
+  }
+
+  ret = strncpy(dest, src, dest_size - 1);
+  dest[dest_size - 1] = 0;
+
+  return ret;
+}
+
+#endif /* FLAC__SHARE_SAFE_STR_H */

diff --git a/include/share/utf8.h b/include/share/utf8.h
new file mode 100644
index 0000000..7c0645a
--- /dev/null
+++ b/include/share/utf8.h

@@ -0,0 +1,25 @@
+#ifndef SHARE__UTF8_H
+#define SHARE__UTF8_H
+
+/*
+ * Convert a string between UTF-8 and the locale's charset.
+ * Invalid bytes are replaced by '#', and characters that are
+ * not available in the target encoding are replaced by '?'.
+ *
+ * If the locale's charset is not set explicitly then it is
+ * obtained using nl_langinfo(CODESET), where available, the
+ * environment variable CHARSET, or assumed to be US-ASCII.
+ *
+ * Return value of conversion functions:
+ *
+ *  -1 : memory allocation failed
+ *   0 : data was converted exactly
+ *   1 : valid data was converted approximately (using '?')
+ *   2 : input was invalid (but still converted, using '#')
+ *   3 : unknown encoding (but still converted, using '?')
+ */
+
+int utf8_encode(const char* from, char** to);
+int utf8_decode(const char* from, char** to);
+
+#endif

diff --git a/include/share/win_utf8_io.h b/include/share/win_utf8_io.h
index 1d15339..dbe942d 100644
--- a/include/share/win_utf8_io.h
+++ b/include/share/win_utf8_io.h

@@ -1,5 +1,5 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2013-2014  Xiph.Org Foundation
+ * Copyright (C) 2013-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -34,14 +34,18 @@
 #ifndef flac__win_utf8_io_h
 #define flac__win_utf8_io_h
 
+#include <stdarg.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <sys/utime.h>
+#include "FLAC/ordinals.h"
+
 #ifdef __cplusplus
 extern "C" {
 #endif
 
-#include <stdio.h>
-#include <sys/stat.h>
-#include <stdarg.h>
-#include <windows.h>
+size_t strlen_utf8(const char* str);
+int win_get_console_width(void);
 
 int get_utf8_argv(int *argc, char ***argv);
 
@@ -49,16 +53,14 @@
 int fprintf_utf8(FILE *stream, const char *format, ...);
 int vfprintf_utf8(FILE *stream, const char *format, va_list argptr);
 
-FILE *fopen_utf8(const char *filename, const char *mode);
-int stat_utf8(const char *path, struct stat *buffer);
-int _stat64_utf8(const char *path, struct __stat64 *buffer);
+FILE* fopen_utf8(const char* filename, const char* mode);
+int stat64_utf8(const char* path, struct __stat64* buffer);
 int chmod_utf8(const char *filename, int pmode);
 int utime_utf8(const char *filename, struct utimbuf *times);
 int unlink_utf8(const char *filename);
 int rename_utf8(const char *oldname, const char *newname);
-size_t strlen_utf8(const char *str);
-int win_get_console_width(void);
-int print_console(FILE *stream, const wchar_t *text, size_t len);
+
+#include <windows.h>
 HANDLE WINAPI CreateFile_utf8(const char *lpFileName, DWORD dwDesiredAccess, DWORD dwShareMode, LPSECURITY_ATTRIBUTES lpSecurityAttributes, DWORD dwCreationDisposition, DWORD dwFlagsAndAttributes, HANDLE hTemplateFile);
 
 #ifdef __cplusplus

diff --git a/src/libFLAC/alloc.c b/src/libFLAC/alloc.c
deleted file mode 100644
index c8c30d3..0000000
--- a/src/libFLAC/alloc.c
+++ /dev/null

@@ -1,59 +0,0 @@
-// Copyright (c) 2011 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include <limits.h>
-#if !defined _MSC_VER
-#include <stdint.h>
-#endif
-#include <stdlib.h>
-#include "share/alloc.h"
-
-void *safe_malloc_(size_t size) {
-  return malloc(size ? size : 1);
-}
-
-void *safe_calloc_(size_t num_items, size_t size) {
-  if (num_items && size)
-    return calloc(num_items, size);
-  return malloc(1);
-}
-
-void *safe_malloc_add_2op_(size_t size1, size_t size2) {
-  size_t size = size1 + size2;
-  if (size < size1)
-    return NULL;
-
-  return safe_malloc_(size);
-}
-
-void *safe_malloc_mul_2op_(size_t size1, size_t size2) {
-  if (!size1 || !size2)
-    return malloc(1);
-
-  if (size1 > SIZE_MAX / size2)
-    return NULL;
-
-  return malloc(size1 * size2);
-}
-
-void *safe_malloc_muladd2_(size_t size1, size_t size2, size_t size3) {
-  size_t size = size2 + size3;
-  if (size < size3)
-    return 0;
-
-  if (!size1 || !size)
-    return malloc(1);
-
-  return safe_malloc_mul_2op_(size1, size2);
-}
-
-void *safe_realloc_mul_2op_(void *ptr, size_t size1, size_t size2) {
-  if (!size1 || !size2)
-    return realloc(ptr, 0);
-
-  if (size1 > SIZE_MAX / size2)
-    return 0;
-
-  return realloc(ptr, size1 * size2);
-}

diff --git a/src/libFLAC/bitmath.c b/src/libFLAC/bitmath.c
index 5b58ca9..7c73cc0 100644
--- a/src/libFLAC/bitmath.c
+++ b/src/libFLAC/bitmath.c

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2001-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -60,50 +60,14 @@
  * silog2(  9) = 5
  * silog2( 10) = 5
  */
-unsigned FLAC__bitmath_silog2(int v)
+uint32_t FLAC__bitmath_silog2(FLAC__int64 v)
 {
-	while(1) {
-		if(v == 0) {
-			return 0;
-		}
-		else if(v > 0) {
-			unsigned l = 0;
-			while(v) {
-				l++;
-				v >>= 1;
-			}
-			return l+1;
-		}
-		else if(v == -1) {
-			return 2;
-		}
-		else {
-			v++;
-			v = -v;
-		}
-	}
-}
+	if(v == 0)
+		return 0;
 
-unsigned FLAC__bitmath_silog2_wide(FLAC__int64 v)
-{
-	while(1) {
-		if(v == 0) {
-			return 0;
-		}
-		else if(v > 0) {
-			unsigned l = 0;
-			while(v) {
-				l++;
-				v >>= 1;
-			}
-			return l+1;
-		}
-		else if(v == -1) {
-			return 2;
-		}
-		else {
-			v++;
-			v = -v;
-		}
-	}
+	if(v == -1)
+		return 2;
+
+	v = (v < 0) ? (-(v+1)) : v;
+	return FLAC__bitmath_ilog2_wide(v)+2;
 }

diff --git a/src/libFLAC/bitreader.c b/src/libFLAC/bitreader.c
index f61229b..9a8a5f0 100644
--- a/src/libFLAC/bitreader.c
+++ b/src/libFLAC/bitreader.c

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2000-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -45,18 +45,43 @@
 #include "share/endswap.h"
 
 /* Things should be fastest when this matches the machine word size */
-/* WATCHOUT: if you change this you must also change the following #defines down to FLAC__clz_uint32 below to match */
-/* WATCHOUT: there are a few places where the code will not work unless uint32_t is >= 32 bits wide */
+/* WATCHOUT: if you change this you must also change the following #defines down to COUNT_ZERO_MSBS2 below to match */
+/* WATCHOUT: there are a few places where the code will not work unless brword is >= 32 bits wide */
 /*           also, some sections currently only have fast versions for 4 or 8 bytes per word */
-#define FLAC__BYTES_PER_WORD 4		/* sizeof uint32_t */
-#define FLAC__BITS_PER_WORD (8 * FLAC__BYTES_PER_WORD)
+
+#if (ENABLE_64_BIT_WORDS == 0)
+
+typedef FLAC__uint32 brword;
+#define FLAC__BYTES_PER_WORD 4		/* sizeof brword */
+#define FLAC__BITS_PER_WORD 32
 #define FLAC__WORD_ALL_ONES ((FLAC__uint32)0xffffffff)
-/* SWAP_BE_WORD_TO_HOST swaps bytes in a uint32_t (which is always big-endian) if necessary to match host byte order */
+/* SWAP_BE_WORD_TO_HOST swaps bytes in a brword (which is always big-endian) if necessary to match host byte order */
 #if WORDS_BIGENDIAN
 #define SWAP_BE_WORD_TO_HOST(x) (x)
 #else
 #define SWAP_BE_WORD_TO_HOST(x) ENDSWAP_32(x)
 #endif
+/* counts the # of zero MSBs in a word */
+#define COUNT_ZERO_MSBS(word) FLAC__clz_uint32(word)
+#define COUNT_ZERO_MSBS2(word) FLAC__clz2_uint32(word)
+
+#else
+
+typedef FLAC__uint64 brword;
+#define FLAC__BYTES_PER_WORD 8		/* sizeof brword */
+#define FLAC__BITS_PER_WORD 64
+#define FLAC__WORD_ALL_ONES ((FLAC__uint64)FLAC__U64L(0xffffffffffffffff))
+/* SWAP_BE_WORD_TO_HOST swaps bytes in a brword (which is always big-endian) if necessary to match host byte order */
+#if WORDS_BIGENDIAN
+#define SWAP_BE_WORD_TO_HOST(x) (x)
+#else
+#define SWAP_BE_WORD_TO_HOST(x) ENDSWAP_64(x)
+#endif
+/* counts the # of zero MSBs in a word */
+#define COUNT_ZERO_MSBS(word) FLAC__clz_uint64(word)
+#define COUNT_ZERO_MSBS2(word) FLAC__clz2_uint64(word)
+
+#endif
 
 /*
  * This should be at least twice as large as the largest number of words
@@ -72,60 +97,79 @@
  * also depends on the CPU cache size and other factors; some twiddling
  * may be necessary to squeeze out the best performance.
  */
-static const unsigned FLAC__BITREADER_DEFAULT_CAPACITY = 65536u / FLAC__BITS_PER_WORD; /* in words */
+static const uint32_t FLAC__BITREADER_DEFAULT_CAPACITY = 65536u / FLAC__BITS_PER_WORD; /* in words */
 
 struct FLAC__BitReader {
 	/* any partially-consumed word at the head will stay right-justified as bits are consumed from the left */
 	/* any incomplete word at the tail will be left-justified, and bytes from the read callback are added on the right */
-	uint32_t *buffer;
-	unsigned capacity; /* in words */
-	unsigned words; /* # of completed words in buffer */
-	unsigned bytes; /* # of bytes in incomplete word at buffer[words] */
-	unsigned consumed_words; /* #words ... */
-	unsigned consumed_bits; /* ... + (#bits of head word) already consumed from the front of buffer */
-	unsigned read_crc16; /* the running frame CRC */
-	unsigned crc16_align; /* the number of bits in the current consumed word that should not be CRC'd */
+	brword *buffer;
+	uint32_t capacity; /* in words */
+	uint32_t words; /* # of completed words in buffer */
+	uint32_t bytes; /* # of bytes in incomplete word at buffer[words] */
+	uint32_t consumed_words; /* #words ... */
+	uint32_t consumed_bits; /* ... + (#bits of head word) already consumed from the front of buffer */
+	uint32_t read_crc16; /* the running frame CRC */
+	uint32_t crc16_offset; /* the number of words in the current buffer that should not be CRC'd */
+	uint32_t crc16_align; /* the number of bits in the current consumed word that should not be CRC'd */
+	FLAC__bool read_limit_set; /* whether reads are limited */
+	uint32_t read_limit; /* the remaining size of what can be read */
+	uint32_t last_seen_framesync; /* the location of the last seen framesync, if it is in the buffer, in bits from front of buffer */
 	FLAC__BitReaderReadCallback read_callback;
 	void *client_data;
 };
 
-static inline void crc16_update_word_(FLAC__BitReader *br, uint32_t word)
+static inline void crc16_update_word_(FLAC__BitReader *br, brword word)
 {
-	register unsigned crc = br->read_crc16;
-#if FLAC__BYTES_PER_WORD == 4
-	switch(br->crc16_align) {
-		case  0: crc = FLAC__CRC16_UPDATE((unsigned)(word >> 24), crc);
-		case  8: crc = FLAC__CRC16_UPDATE((unsigned)((word >> 16) & 0xff), crc);
-		case 16: crc = FLAC__CRC16_UPDATE((unsigned)((word >> 8) & 0xff), crc);
-		case 24: br->read_crc16 = FLAC__CRC16_UPDATE((unsigned)(word & 0xff), crc);
+	register uint32_t crc = br->read_crc16;
+
+	for ( ; br->crc16_align < FLAC__BITS_PER_WORD ; br->crc16_align += 8) {
+		uint32_t shift = FLAC__BITS_PER_WORD - 8 - br->crc16_align ;
+		crc = FLAC__CRC16_UPDATE ((uint32_t) (shift < FLAC__BITS_PER_WORD ? (word >> shift) & 0xff : 0), crc);
 	}
-#elif FLAC__BYTES_PER_WORD == 8
-	switch(br->crc16_align) {
-		case  0: crc = FLAC__CRC16_UPDATE((unsigned)(word >> 56), crc);
-		case  8: crc = FLAC__CRC16_UPDATE((unsigned)((word >> 48) & 0xff), crc);
-		case 16: crc = FLAC__CRC16_UPDATE((unsigned)((word >> 40) & 0xff), crc);
-		case 24: crc = FLAC__CRC16_UPDATE((unsigned)((word >> 32) & 0xff), crc);
-		case 32: crc = FLAC__CRC16_UPDATE((unsigned)((word >> 24) & 0xff), crc);
-		case 40: crc = FLAC__CRC16_UPDATE((unsigned)((word >> 16) & 0xff), crc);
-		case 48: crc = FLAC__CRC16_UPDATE((unsigned)((word >> 8) & 0xff), crc);
-		case 56: br->read_crc16 = FLAC__CRC16_UPDATE((unsigned)(word & 0xff), crc);
-	}
-#else
-	for( ; br->crc16_align < FLAC__BITS_PER_WORD; br->crc16_align += 8)
-		crc = FLAC__CRC16_UPDATE((unsigned)((word >> (FLAC__BITS_PER_WORD-8-br->crc16_align)) & 0xff), crc);
+
 	br->read_crc16 = crc;
-#endif
 	br->crc16_align = 0;
 }
 
+static inline void crc16_update_block_(FLAC__BitReader *br)
+{
+	if(br->consumed_words > br->crc16_offset && br->crc16_align)
+		crc16_update_word_(br, br->buffer[br->crc16_offset++]);
+
+	/* Prevent OOB read due to wrap-around. */
+	if (br->consumed_words > br->crc16_offset) {
+#if FLAC__BYTES_PER_WORD == 4
+		br->read_crc16 = FLAC__crc16_update_words32(br->buffer + br->crc16_offset, br->consumed_words - br->crc16_offset, br->read_crc16);
+#elif FLAC__BYTES_PER_WORD == 8
+		br->read_crc16 = FLAC__crc16_update_words64(br->buffer + br->crc16_offset, br->consumed_words - br->crc16_offset, br->read_crc16);
+#else
+		unsigned i;
+
+		for (i = br->crc16_offset; i < br->consumed_words; i++)
+			crc16_update_word_(br, br->buffer[i]);
+#endif
+	}
+
+	br->crc16_offset = 0;
+}
+
 static FLAC__bool bitreader_read_from_client_(FLAC__BitReader *br)
 {
-	unsigned start, end;
+	uint32_t start, end;
 	size_t bytes;
 	FLAC__byte *target;
+#if WORDS_BIGENDIAN
+#else
+	brword preswap_backup;
+#endif
+
+	/* invalidate last seen framesync */
+	br->last_seen_framesync = -1;
 
 	/* first shift the unconsumed buffer data toward the front as much as possible */
 	if(br->consumed_words > 0) {
+		crc16_update_block_(br); /* CRC consumed words */
+
 		start = br->consumed_words;
 		end = br->words + (br->bytes? 1:0);
 		memmove(br->buffer, br->buffer+start, FLAC__BYTES_PER_WORD * (end - start));
@@ -142,9 +186,9 @@
 		return false; /* no space left, buffer is too small; see note for FLAC__BITREADER_DEFAULT_CAPACITY  */
 	target = ((FLAC__byte*)(br->buffer+br->words)) + br->bytes;
 
-	/* before reading, if the existing reader looks like this (say uint32_t is 32 bits wide)
+	/* before reading, if the existing reader looks like this (say brword is 32 bits wide)
 	 *   bitstream :  11 22 33 44 55            br->words=1 br->bytes=1 (partial tail word is left-justified)
-	 *   buffer[BE]:  11 22 33 44 55 ?? ?? ??   (shown layed out as bytes sequentially in memory)
+	 *   buffer[BE]:  11 22 33 44 55 ?? ?? ??   (shown laid out as bytes sequentially in memory)
 	 *   buffer[LE]:  44 33 22 11 ?? ?? ?? 55   (?? being don't-care)
 	 *                               ^^-------target, bytes=3
 	 * on LE machines, have to byteswap the odd tail word so nothing is
@@ -152,6 +196,7 @@
 	 */
 #if WORDS_BIGENDIAN
 #else
+	preswap_backup = br->buffer[br->words];
 	if(br->bytes)
 		br->buffer[br->words] = SWAP_BE_WORD_TO_HOST(br->buffer[br->words]);
 #endif
@@ -164,8 +209,16 @@
 	 */
 
 	/* read in the data; note that the callback may return a smaller number of bytes */
-	if(!br->read_callback(target, &bytes, br->client_data))
+	if(!br->read_callback(target, &bytes, br->client_data)){
+		/* Despite the read callback failing, the data in the target
+		 * might be used later, when the buffer is rewound. Therefore
+		 * we revert the swap that was just done */
+#if WORDS_BIGENDIAN
+#else
+		br->buffer[br->words] = preswap_backup;
+#endif
 		return false;
+	}
 
 	/* after reading bytes 66 77 88 99 AA BB CC DD EE FF from the client:
 	 *   bitstream :  11 22 33 44 55 66 77 88 99 AA BB CC DD EE FF
@@ -175,7 +228,7 @@
 	 */
 #if WORDS_BIGENDIAN
 #else
-	end = (br->words*FLAC__BYTES_PER_WORD + br->bytes + bytes + (FLAC__BYTES_PER_WORD-1)) / FLAC__BYTES_PER_WORD;
+	end = (br->words*FLAC__BYTES_PER_WORD + br->bytes + (uint32_t)bytes + (FLAC__BYTES_PER_WORD-1)) / FLAC__BYTES_PER_WORD;
 	for(start = br->words; start < end; start++)
 		br->buffer[start] = SWAP_BE_WORD_TO_HOST(br->buffer[start]);
 #endif
@@ -186,7 +239,7 @@
 	 *   buffer[LE]:  44 33 22 11 88 77 66 55 CC BB AA 99 ?? FF EE DD
 	 * finally we'll update the reader values:
 	 */
-	end = br->words*FLAC__BYTES_PER_WORD + br->bytes + bytes;
+	end = br->words*FLAC__BYTES_PER_WORD + br->bytes + (uint32_t)bytes;
 	br->words = end / FLAC__BYTES_PER_WORD;
 	br->bytes = end % FLAC__BYTES_PER_WORD;
 
@@ -236,11 +289,14 @@
 	br->words = br->bytes = 0;
 	br->consumed_words = br->consumed_bits = 0;
 	br->capacity = FLAC__BITREADER_DEFAULT_CAPACITY;
-	br->buffer = malloc(sizeof(uint32_t) * br->capacity);
+	br->buffer = malloc(sizeof(brword) * br->capacity);
 	if(br->buffer == 0)
 		return false;
 	br->read_callback = rcb;
 	br->client_data = cd;
+	br->read_limit_set = false;
+	br->read_limit = -1;
+	br->last_seen_framesync = -1;
 
 	return true;
 }
@@ -257,18 +313,42 @@
 	br->consumed_words = br->consumed_bits = 0;
 	br->read_callback = 0;
 	br->client_data = 0;
+	br->read_limit_set = false;
+	br->read_limit = -1;
+	br->last_seen_framesync = -1;
 }
 
 FLAC__bool FLAC__bitreader_clear(FLAC__BitReader *br)
 {
 	br->words = br->bytes = 0;
 	br->consumed_words = br->consumed_bits = 0;
+	br->read_limit_set = false;
+	br->read_limit = -1;
+	br->last_seen_framesync = -1;
 	return true;
 }
 
+void FLAC__bitreader_set_framesync_location(FLAC__BitReader *br)
+{
+	br->last_seen_framesync = br->consumed_words * FLAC__BYTES_PER_WORD + br->consumed_bits / 8;
+}
+
+FLAC__bool FLAC__bitreader_rewind_to_after_last_seen_framesync(FLAC__BitReader *br)
+{
+	if(br->last_seen_framesync == (uint32_t)-1) {
+		br->consumed_words = br->consumed_bits = 0;
+		return false;
+	}
+	else {
+		br->consumed_words = (br->last_seen_framesync + 1) / FLAC__BYTES_PER_WORD;
+		br->consumed_bits  = ((br->last_seen_framesync + 1) % FLAC__BYTES_PER_WORD) * 8;
+		return true;
+	}
+}
+
 void FLAC__bitreader_dump(const FLAC__BitReader *br, FILE *out)
 {
-	unsigned i, j;
+	uint32_t i, j;
 	if(br == 0) {
 		fprintf(out, "bitreader is NULL\n");
 	}
@@ -281,7 +361,7 @@
 				if(i < br->consumed_words || (i == br->consumed_words && j < br->consumed_bits))
 					fprintf(out, ".");
 				else
-					fprintf(out, "%01u", br->buffer[i] & (1 << (FLAC__BITS_PER_WORD-j-1)) ? 1:0);
+					fprintf(out, "%01d", br->buffer[i] & ((brword)1 << (FLAC__BITS_PER_WORD-j-1)) ? 1:0);
 			fprintf(out, "\n");
 		}
 		if(br->bytes > 0) {
@@ -290,7 +370,7 @@
 				if(i < br->consumed_words || (i == br->consumed_words && j < br->consumed_bits))
 					fprintf(out, ".");
 				else
-					fprintf(out, "%01u", br->buffer[i] & (1 << (br->bytes*8-j-1)) ? 1:0);
+					fprintf(out, "%01d", br->buffer[i] & ((brword)1 << (br->bytes*8-j-1)) ? 1:0);
 			fprintf(out, "\n");
 		}
 	}
@@ -302,7 +382,8 @@
 	FLAC__ASSERT(0 != br->buffer);
 	FLAC__ASSERT((br->consumed_bits & 7) == 0);
 
-	br->read_crc16 = (unsigned)seed;
+	br->read_crc16 = (uint32_t)seed;
+	br->crc16_offset = br->consumed_words;
 	br->crc16_align = br->consumed_bits;
 }
 
@@ -310,14 +391,18 @@
 {
 	FLAC__ASSERT(0 != br);
 	FLAC__ASSERT(0 != br->buffer);
+
+	/* CRC consumed words up to here */
+	crc16_update_block_(br);
+
 	FLAC__ASSERT((br->consumed_bits & 7) == 0);
 	FLAC__ASSERT(br->crc16_align <= br->consumed_bits);
 
 	/* CRC any tail bytes in a partially-consumed word */
 	if(br->consumed_bits) {
-		const uint32_t tail = br->buffer[br->consumed_words];
+		const brword tail = br->buffer[br->consumed_words];
 		for( ; br->crc16_align < br->consumed_bits; br->crc16_align += 8)
-			br->read_crc16 = FLAC__CRC16_UPDATE((unsigned)((tail >> (FLAC__BITS_PER_WORD-8-br->crc16_align)) & 0xff), br->read_crc16);
+			br->read_crc16 = FLAC__CRC16_UPDATE((uint32_t)((tail >> (FLAC__BITS_PER_WORD-8-br->crc16_align)) & 0xff), br->read_crc16);
 	}
 	return br->read_crc16;
 }
@@ -327,17 +412,39 @@
 	return ((br->consumed_bits & 7) == 0);
 }
 
-inline unsigned FLAC__bitreader_bits_left_for_byte_alignment(const FLAC__BitReader *br)
+inline uint32_t FLAC__bitreader_bits_left_for_byte_alignment(const FLAC__BitReader *br)
 {
 	return 8 - (br->consumed_bits & 7);
 }
 
-inline unsigned FLAC__bitreader_get_input_bits_unconsumed(const FLAC__BitReader *br)
+inline uint32_t FLAC__bitreader_get_input_bits_unconsumed(const FLAC__BitReader *br)
 {
 	return (br->words-br->consumed_words)*FLAC__BITS_PER_WORD + br->bytes*8 - br->consumed_bits;
 }
 
-FLAC__bool FLAC__bitreader_read_raw_uint32(FLAC__BitReader *br, FLAC__uint32 *val, unsigned bits)
+void FLAC__bitreader_set_limit(FLAC__BitReader *br, uint32_t limit)
+{
+	br->read_limit = limit;
+	br->read_limit_set = true;
+}
+
+void FLAC__bitreader_remove_limit(FLAC__BitReader *br)
+{
+	br->read_limit_set = false;
+	br->read_limit = -1;
+}
+
+uint32_t FLAC__bitreader_limit_remaining(FLAC__BitReader *br)
+{
+	FLAC__ASSERT(br->read_limit_set);
+	return br->read_limit;
+}
+void FLAC__bitreader_limit_invalidate(FLAC__BitReader *br)
+{
+	br->read_limit = -1;
+}
+
+FLAC__bool FLAC__bitreader_read_raw_uint32(FLAC__BitReader *br, FLAC__uint32 *val, uint32_t bits)
 {
 	FLAC__ASSERT(0 != br);
 	FLAC__ASSERT(0 != br->buffer);
@@ -354,6 +461,15 @@
 		return true;
 	}
 
+	if(br->read_limit_set && br->read_limit < (uint32_t)-1){
+		if(br->read_limit < bits) {
+			br->read_limit = -1;
+			return false;
+		}
+		else
+			br->read_limit -= bits;
+	}
+
 	while((br->words-br->consumed_words)*FLAC__BITS_PER_WORD + br->bytes*8 - br->consumed_bits < bits) {
 		if(!bitreader_read_from_client_(br))
 			return false;
@@ -362,35 +478,37 @@
 		/* OPT: taking out the consumed_bits==0 "else" case below might make things faster if less code allows the compiler to inline this function */
 		if(br->consumed_bits) {
 			/* this also works when consumed_bits==0, it's just a little slower than necessary for that case */
-			const unsigned n = FLAC__BITS_PER_WORD - br->consumed_bits;
-			const uint32_t word = br->buffer[br->consumed_words];
+			const uint32_t n = FLAC__BITS_PER_WORD - br->consumed_bits;
+			const brword word = br->buffer[br->consumed_words];
+			const brword mask = br->consumed_bits < FLAC__BITS_PER_WORD ? FLAC__WORD_ALL_ONES >> br->consumed_bits : 0;
 			if(bits < n) {
-				*val = (word & (FLAC__WORD_ALL_ONES >> br->consumed_bits)) >> (n-bits);
+				uint32_t shift = n - bits;
+				*val = shift < FLAC__BITS_PER_WORD ? (FLAC__uint32)((word & mask) >> shift) : 0; /* The result has <= 32 non-zero bits */
 				br->consumed_bits += bits;
 				return true;
 			}
-			*val = word & (FLAC__WORD_ALL_ONES >> br->consumed_bits);
+			/* (FLAC__BITS_PER_WORD - br->consumed_bits <= bits) ==> (FLAC__WORD_ALL_ONES >> br->consumed_bits) has no more than 'bits' non-zero bits */
+			*val = (FLAC__uint32)(word & mask);
 			bits -= n;
-			crc16_update_word_(br, word);
 			br->consumed_words++;
 			br->consumed_bits = 0;
 			if(bits) { /* if there are still bits left to read, there have to be less than 32 so they will all be in the next word */
-				*val <<= bits;
-				*val |= (br->buffer[br->consumed_words] >> (FLAC__BITS_PER_WORD-bits));
+				uint32_t shift = FLAC__BITS_PER_WORD - bits;
+				*val = bits < 32 ? *val << bits : 0;
+				*val |= shift < FLAC__BITS_PER_WORD ? (FLAC__uint32)(br->buffer[br->consumed_words] >> shift) : 0;
 				br->consumed_bits = bits;
 			}
 			return true;
 		}
-		else {
-			const uint32_t word = br->buffer[br->consumed_words];
+		else { /* br->consumed_bits == 0 */
+			const brword word = br->buffer[br->consumed_words];
 			if(bits < FLAC__BITS_PER_WORD) {
-				*val = word >> (FLAC__BITS_PER_WORD-bits);
+				*val = (FLAC__uint32)(word >> (FLAC__BITS_PER_WORD-bits));
 				br->consumed_bits = bits;
 				return true;
 			}
-			/* at this point 'bits' must be == FLAC__BITS_PER_WORD; because of previous assertions, it can't be larger */
-			*val = word;
-			crc16_update_word_(br, word);
+			/* at this point bits == FLAC__BITS_PER_WORD == 32; because of previous assertions, it can't be larger */
+			*val = (FLAC__uint32)word;
 			br->consumed_words++;
 			return true;
 		}
@@ -404,30 +522,32 @@
 		if(br->consumed_bits) {
 			/* this also works when consumed_bits==0, it's just a little slower than necessary for that case */
 			FLAC__ASSERT(br->consumed_bits + bits <= br->bytes*8);
-			*val = (br->buffer[br->consumed_words] & (FLAC__WORD_ALL_ONES >> br->consumed_bits)) >> (FLAC__BITS_PER_WORD-br->consumed_bits-bits);
+			*val = (FLAC__uint32)((br->buffer[br->consumed_words] & (FLAC__WORD_ALL_ONES >> br->consumed_bits)) >> (FLAC__BITS_PER_WORD-br->consumed_bits-bits));
 			br->consumed_bits += bits;
 			return true;
 		}
 		else {
-			*val = br->buffer[br->consumed_words] >> (FLAC__BITS_PER_WORD-bits);
+			*val = (FLAC__uint32)(br->buffer[br->consumed_words] >> (FLAC__BITS_PER_WORD-bits));
 			br->consumed_bits += bits;
 			return true;
 		}
 	}
 }
 
-FLAC__bool FLAC__bitreader_read_raw_int32(FLAC__BitReader *br, FLAC__int32 *val, unsigned bits)
+FLAC__bool FLAC__bitreader_read_raw_int32(FLAC__BitReader *br, FLAC__int32 *val, uint32_t bits)
 {
+	FLAC__uint32 uval, mask;
 	/* OPT: inline raw uint32 code here, or make into a macro if possible in the .h file */
-	if(!FLAC__bitreader_read_raw_uint32(br, (FLAC__uint32*)val, bits))
+	if (bits < 1 || ! FLAC__bitreader_read_raw_uint32(br, &uval, bits))
 		return false;
-	/* sign-extend: */
-	*val <<= (32-bits);
-	*val >>= (32-bits);
+	/* sign-extend *val assuming it is currently bits wide. */
+	/* From: https://graphics.stanford.edu/~seander/bithacks.html#FixedSignExtend */
+	mask = bits >= 33 ? 0 : 1lu << (bits - 1);
+	*val = (uval ^ mask) - mask;
 	return true;
 }
 
-FLAC__bool FLAC__bitreader_read_raw_uint64(FLAC__BitReader *br, FLAC__uint64 *val, unsigned bits)
+FLAC__bool FLAC__bitreader_read_raw_uint64(FLAC__BitReader *br, FLAC__uint64 *val, uint32_t bits)
 {
 	FLAC__uint32 hi, lo;
 
@@ -448,6 +568,19 @@
 	return true;
 }
 
+FLAC__bool FLAC__bitreader_read_raw_int64(FLAC__BitReader *br, FLAC__int64 *val, uint32_t bits)
+{
+	FLAC__uint64 uval, mask;
+	/* OPT: inline raw uint64 code here, or make into a macro if possible in the .h file */
+	if (bits < 1 || ! FLAC__bitreader_read_raw_uint64(br, &uval, bits))
+		return false;
+	/* sign-extend *val assuming it is currently bits wide. */
+	/* From: https://graphics.stanford.edu/~seander/bithacks.html#FixedSignExtend */
+	mask = bits >= 65 ? 0 : 1llu << (bits - 1);
+	*val = (uval ^ mask) - mask;
+	return true;
+}
+
 inline FLAC__bool FLAC__bitreader_read_uint32_little_endian(FLAC__BitReader *br, FLAC__uint32 *val)
 {
 	FLAC__uint32 x8, x32 = 0;
@@ -473,7 +606,7 @@
 	return true;
 }
 
-FLAC__bool FLAC__bitreader_skip_bits_no_crc(FLAC__BitReader *br, unsigned bits)
+FLAC__bool FLAC__bitreader_skip_bits_no_crc(FLAC__BitReader *br, uint32_t bits)
 {
 	/*
 	 * OPT: a faster implementation is possible but probably not that useful
@@ -483,8 +616,8 @@
 	FLAC__ASSERT(0 != br->buffer);
 
 	if(bits > 0) {
-		const unsigned n = br->consumed_bits & 7;
-		unsigned m;
+		const uint32_t n = br->consumed_bits & 7;
+		uint32_t m;
 		FLAC__uint32 x;
 
 		if(n != 0) {
@@ -508,7 +641,7 @@
 	return true;
 }
 
-FLAC__bool FLAC__bitreader_skip_byte_block_aligned_no_crc(FLAC__BitReader *br, unsigned nvals)
+FLAC__bool FLAC__bitreader_skip_byte_block_aligned_no_crc(FLAC__BitReader *br, uint32_t nvals)
 {
 	FLAC__uint32 x;
 
@@ -516,6 +649,13 @@
 	FLAC__ASSERT(0 != br->buffer);
 	FLAC__ASSERT(FLAC__bitreader_is_consumed_byte_aligned(br));
 
+	if(br->read_limit_set && br->read_limit < (uint32_t)-1){
+		if(br->read_limit < nvals*8){
+			br->read_limit = -1;
+			return false;
+		}
+	}
+
 	/* step 1: skip over partial head word to get word aligned */
 	while(nvals && br->consumed_bits) { /* i.e. run until we read 'nvals' bytes or we hit the end of the head word */
 		if(!FLAC__bitreader_read_raw_uint32(br, &x, 8))
@@ -524,11 +664,14 @@
 	}
 	if(0 == nvals)
 		return true;
+
 	/* step 2: skip whole words in chunks */
 	while(nvals >= FLAC__BYTES_PER_WORD) {
 		if(br->consumed_words < br->words) {
 			br->consumed_words++;
 			nvals -= FLAC__BYTES_PER_WORD;
+			if(br->read_limit_set)
+				br->read_limit -= FLAC__BITS_PER_WORD;
 		}
 		else if(!bitreader_read_from_client_(br))
 			return false;
@@ -543,7 +686,7 @@
 	return true;
 }
 
-FLAC__bool FLAC__bitreader_read_byte_block_aligned_no_crc(FLAC__BitReader *br, FLAC__byte *val, unsigned nvals)
+FLAC__bool FLAC__bitreader_read_byte_block_aligned_no_crc(FLAC__BitReader *br, FLAC__byte *val, uint32_t nvals)
 {
 	FLAC__uint32 x;
 
@@ -551,6 +694,13 @@
 	FLAC__ASSERT(0 != br->buffer);
 	FLAC__ASSERT(FLAC__bitreader_is_consumed_byte_aligned(br));
 
+	if(br->read_limit_set && br->read_limit < (uint32_t)-1){
+		if(br->read_limit < nvals*8){
+			br->read_limit = -1;
+			return false;
+		}
+	}
+
 	/* step 1: read from partial head word to get word aligned */
 	while(nvals && br->consumed_bits) { /* i.e. run until we read 'nvals' bytes or we hit the end of the head word */
 		if(!FLAC__bitreader_read_raw_uint32(br, &x, 8))
@@ -563,7 +713,7 @@
 	/* step 2: read whole words in chunks */
 	while(nvals >= FLAC__BYTES_PER_WORD) {
 		if(br->consumed_words < br->words) {
-			const uint32_t word = br->buffer[br->consumed_words++];
+			const brword word = br->buffer[br->consumed_words++];
 #if FLAC__BYTES_PER_WORD == 4
 			val[0] = (FLAC__byte)(word >> 24);
 			val[1] = (FLAC__byte)(word >> 16);
@@ -584,6 +734,8 @@
 #endif
 			val += FLAC__BYTES_PER_WORD;
 			nvals -= FLAC__BYTES_PER_WORD;
+			if(br->read_limit_set)
+				br->read_limit -= FLAC__BITS_PER_WORD;
 		}
 		else if(!bitreader_read_from_client_(br))
 			return false;
@@ -599,10 +751,10 @@
 	return true;
 }
 
-FLAC__bool FLAC__bitreader_read_unary_unsigned(FLAC__BitReader *br, unsigned *val)
+FLAC__bool FLAC__bitreader_read_unary_unsigned(FLAC__BitReader *br, uint32_t *val)
 #if 0 /* slow but readable version */
 {
-	unsigned bit;
+	uint32_t bit;
 
 	FLAC__ASSERT(0 != br);
 	FLAC__ASSERT(0 != br->buffer);
@@ -620,7 +772,7 @@
 }
 #else
 {
-	unsigned i;
+	uint32_t i;
 
 	FLAC__ASSERT(0 != br);
 	FLAC__ASSERT(0 != br->buffer);
@@ -628,14 +780,13 @@
 	*val = 0;
 	while(1) {
 		while(br->consumed_words < br->words) { /* if we've not consumed up to a partial tail word... */
-			uint32_t b = br->buffer[br->consumed_words] << br->consumed_bits;
+			brword b = br->consumed_bits < FLAC__BITS_PER_WORD ? br->buffer[br->consumed_words] << br->consumed_bits : 0;
 			if(b) {
-				i = FLAC__clz_uint32(b);
+				i = COUNT_ZERO_MSBS(b);
 				*val += i;
 				i++;
 				br->consumed_bits += i;
 				if(br->consumed_bits >= FLAC__BITS_PER_WORD) { /* faster way of testing if(br->consumed_bits == FLAC__BITS_PER_WORD) */
-					crc16_update_word_(br, br->buffer[br->consumed_words]);
 					br->consumed_words++;
 					br->consumed_bits = 0;
 				}
@@ -643,7 +794,6 @@
 			}
 			else {
 				*val += FLAC__BITS_PER_WORD - br->consumed_bits;
-				crc16_update_word_(br, br->buffer[br->consumed_words]);
 				br->consumed_words++;
 				br->consumed_bits = 0;
 				/* didn't find stop bit yet, have to keep going... */
@@ -657,10 +807,10 @@
 		 * be zero.
 		 */
 		if(br->bytes*8 > br->consumed_bits) {
-			const unsigned end = br->bytes * 8;
-			uint32_t b = (br->buffer[br->consumed_words] & (FLAC__WORD_ALL_ONES << (FLAC__BITS_PER_WORD-end))) << br->consumed_bits;
+			const uint32_t end = br->bytes * 8;
+			brword b = (br->buffer[br->consumed_words] & (FLAC__WORD_ALL_ONES << (FLAC__BITS_PER_WORD-end))) << br->consumed_bits;
 			if(b) {
-				i = FLAC__clz_uint32(b);
+				i = COUNT_ZERO_MSBS(b);
 				*val += i;
 				i++;
 				br->consumed_bits += i;
@@ -680,10 +830,11 @@
 }
 #endif
 
-FLAC__bool FLAC__bitreader_read_rice_signed(FLAC__BitReader *br, int *val, unsigned parameter)
+#if 0 /* unused */
+FLAC__bool FLAC__bitreader_read_rice_signed(FLAC__BitReader *br, int *val, uint32_t parameter)
 {
 	FLAC__uint32 lsbs = 0, msbs = 0;
-	unsigned uval;
+	uint32_t uval;
 
 	FLAC__ASSERT(0 != br);
 	FLAC__ASSERT(0 != br->buffer);
@@ -706,16 +857,17 @@
 
 	return true;
 }
+#endif
 
 /* this is by far the most heavily used reader call.  it ain't pretty but it's fast */
-FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[], unsigned nvals, unsigned parameter)
+FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[], uint32_t nvals, uint32_t parameter)
 {
 	/* try and get br->consumed_words and br->consumed_bits into register;
 	 * must remember to flush them back to *br before calling other
 	 * bitreader functions that use them, and before returning */
-	unsigned cwords, words, lsbs, msbs, x, y;
-	unsigned ucbits; /* keep track of the number of unconsumed bits in word */
-	uint32_t b;
+	uint32_t cwords, words, lsbs, msbs, x, y, limit;
+	uint32_t ucbits; /* keep track of the number of unconsumed bits in word */
+	brword b;
 	int *val, *end;
 
 	FLAC__ASSERT(0 != br);
@@ -725,6 +877,8 @@
 	FLAC__ASSERT(parameter < 32);
 	/* the above two asserts also guarantee that the binary part never straddles more than 2 words, so we don't have to loop to read it */
 
+	limit = UINT32_MAX >> parameter; /* Maximal msbs that can occur with residual bounded to int32_t */
+
 	val = vals;
 	end = vals + nvals;
 
@@ -733,7 +887,8 @@
 			/* read the unary MSBs and end bit */
 			if(!FLAC__bitreader_read_unary_unsigned(br, &msbs))
 				return false;
-
+			/* Checking limit here would be overzealous: coding UINT32_MAX
+			 * with parameter == 0 would take 4GiB */
 			*val++ = (int)(msbs >> 1) ^ -(int)(msbs & 1);
 		}
 
@@ -756,16 +911,16 @@
 
 	while(val < end) {
 		/* read the unary MSBs and end bit */
-		x = y = FLAC__clz2_uint32(b);
+		x = y = COUNT_ZERO_MSBS2(b);
 		if(x == FLAC__BITS_PER_WORD) {
 			x = ucbits;
 			do {
 				/* didn't find stop bit yet, have to keep going... */
-				crc16_update_word_(br, br->buffer[cwords++]);
+				cwords++;
 				if (cwords >= words)
 					goto incomplete_msbs;
 				b = br->buffer[cwords];
-				y = FLAC__clz2_uint32(b);
+				y = COUNT_ZERO_MSBS2(b);
 				x += y;
 			} while(y == FLAC__BITS_PER_WORD);
 		}
@@ -774,19 +929,22 @@
 		ucbits = (ucbits - x - 1) % FLAC__BITS_PER_WORD;
 		msbs = x;
 
+		if(x > limit)
+			return false;
+
 		/* read the binary LSBs */
-		x = b >> (FLAC__BITS_PER_WORD - parameter);
+		x = (FLAC__uint32)(b >> (FLAC__BITS_PER_WORD - parameter)); /* parameter < 32, so we can cast to 32-bit uint32_t */
 		if(parameter <= ucbits) {
 			ucbits -= parameter;
 			b <<= parameter;
 		} else {
 			/* there are still bits left to read, they will all be in the next word */
-			crc16_update_word_(br, br->buffer[cwords++]);
+			cwords++;
 			if (cwords >= words)
 				goto incomplete_lsbs;
 			b = br->buffer[cwords];
 			ucbits += FLAC__BITS_PER_WORD - parameter;
-			x |= b >> ucbits;
+			x |= (FLAC__uint32)(b >> ucbits);
 			b <<= FLAC__BITS_PER_WORD - ucbits;
 		}
 		lsbs = x;
@@ -831,13 +989,13 @@
 			cwords = br->consumed_words;
 			words = br->words;
 			ucbits = FLAC__BITS_PER_WORD - br->consumed_bits;
-			b = br->buffer[cwords] << br->consumed_bits;
+			b = cwords < br->capacity ? br->buffer[cwords] << br->consumed_bits : 0;
 		} while(cwords >= words && val < end);
 	}
 
 	if(ucbits == 0 && cwords < words) {
 		/* don't leave the head word with no unconsumed bits */
-		crc16_update_word_(br, br->buffer[cwords++]);
+		cwords++;
 		ucbits = FLAC__BITS_PER_WORD;
 	}
 
@@ -848,10 +1006,10 @@
 }
 
 #if 0 /* UNUSED */
-FLAC__bool FLAC__bitreader_read_golomb_signed(FLAC__BitReader *br, int *val, unsigned parameter)
+FLAC__bool FLAC__bitreader_read_golomb_signed(FLAC__BitReader *br, int *val, uint32_t parameter)
 {
 	FLAC__uint32 lsbs = 0, msbs = 0;
-	unsigned bit, uval, k;
+	uint32_t bit, uval, k;
 
 	FLAC__ASSERT(0 != br);
 	FLAC__ASSERT(0 != br->buffer);
@@ -871,7 +1029,7 @@
 		uval = (msbs << k) | lsbs;
 	}
 	else {
-		unsigned d = (1 << (k+1)) - parameter;
+		uint32_t d = (1 << (k+1)) - parameter;
 		if(lsbs >= d) {
 			if(!FLAC__bitreader_read_bit(br, &bit))
 				return false;
@@ -883,7 +1041,7 @@
 		uval = msbs * parameter + lsbs;
 	}
 
-	/* unfold unsigned to signed */
+	/* unfold uint32_t to signed */
 	if(uval & 1)
 		*val = -((int)(uval >> 1)) - 1;
 	else
@@ -892,10 +1050,10 @@
 	return true;
 }
 
-FLAC__bool FLAC__bitreader_read_golomb_unsigned(FLAC__BitReader *br, unsigned *val, unsigned parameter)
+FLAC__bool FLAC__bitreader_read_golomb_unsigned(FLAC__BitReader *br, uint32_t *val, uint32_t parameter)
 {
 	FLAC__uint32 lsbs, msbs = 0;
-	unsigned bit, k;
+	uint32_t bit, k;
 
 	FLAC__ASSERT(0 != br);
 	FLAC__ASSERT(0 != br->buffer);
@@ -915,7 +1073,7 @@
 		*val = (msbs << k) | lsbs;
 	}
 	else {
-		unsigned d = (1 << (k+1)) - parameter;
+		uint32_t d = (1 << (k+1)) - parameter;
 		if(lsbs >= d) {
 			if(!FLAC__bitreader_read_bit(br, &bit))
 				return false;
@@ -932,11 +1090,11 @@
 #endif /* UNUSED */
 
 /* on return, if *val == 0xffffffff then the utf-8 sequence was invalid, but the return value will be true */
-FLAC__bool FLAC__bitreader_read_utf8_uint32(FLAC__BitReader *br, FLAC__uint32 *val, FLAC__byte *raw, unsigned *rawlen)
+FLAC__bool FLAC__bitreader_read_utf8_uint32(FLAC__BitReader *br, FLAC__uint32 *val, FLAC__byte *raw, uint32_t *rawlen)
 {
 	FLAC__uint32 v = 0;
 	FLAC__uint32 x;
-	unsigned i;
+	uint32_t i;
 
 	if(!FLAC__bitreader_read_raw_uint32(br, &x, 8))
 		return false;
@@ -987,11 +1145,11 @@
 }
 
 /* on return, if *val == 0xffffffffffffffff then the utf-8 sequence was invalid, but the return value will be true */
-FLAC__bool FLAC__bitreader_read_utf8_uint64(FLAC__BitReader *br, FLAC__uint64 *val, FLAC__byte *raw, unsigned *rawlen)
+FLAC__bool FLAC__bitreader_read_utf8_uint64(FLAC__BitReader *br, FLAC__uint64 *val, FLAC__byte *raw, uint32_t *rawlen)
 {
 	FLAC__uint64 v = 0;
 	FLAC__uint32 x;
-	unsigned i;
+	uint32_t i;
 
 	if(!FLAC__bitreader_read_raw_uint32(br, &x, 8))
 		return false;
@@ -1054,6 +1212,6 @@
  * fix that we add extern declarations here.
  */
 extern FLAC__bool FLAC__bitreader_is_consumed_byte_aligned(const FLAC__BitReader *br);
-extern unsigned FLAC__bitreader_bits_left_for_byte_alignment(const FLAC__BitReader *br);
-extern unsigned FLAC__bitreader_get_input_bits_unconsumed(const FLAC__BitReader *br);
+extern uint32_t FLAC__bitreader_bits_left_for_byte_alignment(const FLAC__BitReader *br);
+extern uint32_t FLAC__bitreader_get_input_bits_unconsumed(const FLAC__BitReader *br);
 extern FLAC__bool FLAC__bitreader_read_uint32_little_endian(FLAC__BitReader *br, FLAC__uint32 *val);

diff --git a/src/libFLAC/bitwriter.c b/src/libFLAC/bitwriter.c
index 9a9fc98..8865a2f 100644
--- a/src/libFLAC/bitwriter.c
+++ b/src/libFLAC/bitwriter.c

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2000-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -38,7 +38,9 @@
 #include <string.h>
 #include "private/bitwriter.h"
 #include "private/crc.h"
+#include "private/format.h"
 #include "private/macros.h"
+#include "private/stream_encoder.h"
 #include "FLAC/assert.h"
 #include "share/alloc.h"
 #include "share/compat.h"
@@ -46,46 +48,63 @@
 
 /* Things should be fastest when this matches the machine word size */
 /* WATCHOUT: if you change this you must also change the following #defines down to SWAP_BE_WORD_TO_HOST below to match */
-/* WATCHOUT: there are a few places where the code will not work unless uint32_t is >= 32 bits wide */
-#define FLAC__BYTES_PER_WORD 4
+/* WATCHOUT: there are a few places where the code will not work unless bwword is >= 32 bits wide */
+
+#if (ENABLE_64_BIT_WORDS == 0)
+
+typedef FLAC__uint32 bwword;
+#define FLAC__BYTES_PER_WORD 4		/* sizeof bwword */
 #define FLAC__BITS_PER_WORD 32
-#define FLAC__WORD_ALL_ONES ((FLAC__uint32)0xffffffff)
-/* SWAP_BE_WORD_TO_HOST swaps bytes in a uint32_t (which is always big-endian) if necessary to match host byte order */
+/* SWAP_BE_WORD_TO_HOST swaps bytes in a bwword (which is always big-endian) if necessary to match host byte order */
 #if WORDS_BIGENDIAN
 #define SWAP_BE_WORD_TO_HOST(x) (x)
 #else
 #define SWAP_BE_WORD_TO_HOST(x) ENDSWAP_32(x)
 #endif
 
+#else
+
+typedef FLAC__uint64 bwword;
+#define FLAC__BYTES_PER_WORD 8		/* sizeof bwword */
+#define FLAC__BITS_PER_WORD 64
+/* SWAP_BE_WORD_TO_HOST swaps bytes in a bwword (which is always big-endian) if necessary to match host byte order */
+#if WORDS_BIGENDIAN
+#define SWAP_BE_WORD_TO_HOST(x) (x)
+#else
+#define SWAP_BE_WORD_TO_HOST(x) ENDSWAP_64(x)
+#endif
+
+#endif
+
 /*
  * The default capacity here doesn't matter too much.  The buffer always grows
  * to hold whatever is written to it.  Usually the encoder will stop adding at
  * a frame or metadata block, then write that out and clear the buffer for the
  * next one.
  */
-static const unsigned FLAC__BITWRITER_DEFAULT_CAPACITY = 32768u / sizeof(uint32_t); /* size in words */
+static const uint32_t FLAC__BITWRITER_DEFAULT_CAPACITY = 32768u / sizeof(bwword); /* size in words */
 /* When growing, increment 4K at a time */
-static const unsigned FLAC__BITWRITER_DEFAULT_INCREMENT = 4096u / sizeof(uint32_t); /* size in words */
+static const uint32_t FLAC__BITWRITER_DEFAULT_INCREMENT = 4096u / sizeof(bwword); /* size in words */
 
 #define FLAC__WORDS_TO_BITS(words) ((words) * FLAC__BITS_PER_WORD)
 #define FLAC__TOTAL_BITS(bw) (FLAC__WORDS_TO_BITS((bw)->words) + (bw)->bits)
 
 struct FLAC__BitWriter {
-	uint32_t *buffer;
-	uint32_t accum; /* accumulator; bits are right-justified; when full, accum is appended to buffer */
-	unsigned capacity; /* capacity of buffer in words */
-	unsigned words; /* # of complete words in buffer */
-	unsigned bits; /* # of used bits in accum */
+	bwword *buffer;
+	bwword accum; /* accumulator; bits are right-justified; when full, accum is appended to buffer */
+	uint32_t capacity; /* capacity of buffer in words */
+	uint32_t words; /* # of complete words in buffer */
+	uint32_t bits; /* # of used bits in accum */
 };
 
 /* * WATCHOUT: The current implementation only grows the buffer. */
 #ifndef __SUNPRO_C
 static
 #endif
-FLAC__bool bitwriter_grow_(FLAC__BitWriter *bw, unsigned bits_to_add)
+FLAC__bool bitwriter_grow_(FLAC__BitWriter *bw, uint32_t bits_to_add)
 {
-	unsigned new_capacity;
-	uint32_t *new_buffer;
+	uint32_t new_capacity;
+	bwword *new_buffer;
 
 	FLAC__ASSERT(0 != bw);
 	FLAC__ASSERT(0 != bw->buffer);
@@ -99,6 +118,13 @@
 	if(bw->capacity >= new_capacity)
 		return true;
 
+	if(new_capacity * sizeof(bwword) > (1u << FLAC__STREAM_METADATA_LENGTH_LEN))
+		/* Requested new capacity is larger than the largest possible metadata block,
+		 * which is also larger than the largest sane framesize. That means something
+		 * went very wrong somewhere and previous checks failed.
+		 * To prevent chrashing, give up */
+		return false;
+
 	/* round up capacity increase to the nearest FLAC__BITWRITER_DEFAULT_INCREMENT */
 	if((new_capacity - bw->capacity) % FLAC__BITWRITER_DEFAULT_INCREMENT)
 		new_capacity += FLAC__BITWRITER_DEFAULT_INCREMENT - ((new_capacity - bw->capacity) % FLAC__BITWRITER_DEFAULT_INCREMENT);
@@ -107,7 +133,7 @@
 	FLAC__ASSERT(new_capacity > bw->capacity);
 	FLAC__ASSERT(new_capacity >= bw->words + ((bw->bits + bits_to_add + FLAC__BITS_PER_WORD - 1) / FLAC__BITS_PER_WORD));
 
-	new_buffer = safe_realloc_mul_2op_(bw->buffer, sizeof(uint32_t), /*times*/new_capacity);
+	new_buffer = safe_realloc_nofree_mul_2op_(bw->buffer, sizeof(bwword), /*times*/new_capacity);
 	if(new_buffer == 0)
 		return false;
 	bw->buffer = new_buffer;
@@ -149,7 +175,7 @@
 
 	bw->words = bw->bits = 0;
 	bw->capacity = FLAC__BITWRITER_DEFAULT_CAPACITY;
-	bw->buffer = malloc(sizeof(uint32_t) * bw->capacity);
+	bw->buffer = malloc(sizeof(bwword) * bw->capacity);
 	if(bw->buffer == 0)
 		return false;
 
@@ -174,7 +200,7 @@
 
 void FLAC__bitwriter_dump(const FLAC__BitWriter *bw, FILE *out)
 {
-	unsigned i, j;
+	uint32_t i, j;
 	if(bw == 0) {
 		fprintf(out, "bitwriter is NULL\n");
 	}
@@ -184,13 +210,13 @@
 		for(i = 0; i < bw->words; i++) {
 			fprintf(out, "%08X: ", i);
 			for(j = 0; j < FLAC__BITS_PER_WORD; j++)
-				fprintf(out, "%01u", bw->buffer[i] & (1 << (FLAC__BITS_PER_WORD-j-1)) ? 1:0);
+				fprintf(out, "%01d", bw->buffer[i] & ((bwword)1 << (FLAC__BITS_PER_WORD-j-1)) ? 1:0);
 			fprintf(out, "\n");
 		}
 		if(bw->bits > 0) {
 			fprintf(out, "%08X: ", i);
 			for(j = 0; j < bw->bits; j++)
-				fprintf(out, "%01u", bw->accum & (1 << (bw->bits-j-1)) ? 1:0);
+				fprintf(out, "%01d", bw->accum & ((bwword)1 << (bw->bits-j-1)) ? 1:0);
 			fprintf(out, "\n");
 		}
 	}
@@ -231,7 +257,7 @@
 	return ((bw->bits & 7) == 0);
 }
 
-unsigned FLAC__bitwriter_get_input_bits_unconsumed(const FLAC__BitWriter *bw)
+uint32_t FLAC__bitwriter_get_input_bits_unconsumed(const FLAC__BitWriter *bw)
 {
 	return FLAC__TOTAL_BITS(bw);
 }
@@ -264,9 +290,9 @@
 	(void)bw;
 }
 
-inline FLAC__bool FLAC__bitwriter_write_zeroes(FLAC__BitWriter *bw, unsigned bits)
+inline FLAC__bool FLAC__bitwriter_write_zeroes(FLAC__BitWriter *bw, uint32_t bits)
 {
-	unsigned n;
+	uint32_t n;
 
 	FLAC__ASSERT(0 != bw);
 	FLAC__ASSERT(0 != bw->buffer);
@@ -302,20 +328,24 @@
 	return true;
 }
 
-inline FLAC__bool FLAC__bitwriter_write_raw_uint32(FLAC__BitWriter *bw, FLAC__uint32 val, unsigned bits)
+static inline FLAC__bool FLAC__bitwriter_write_raw_uint32_nocheck(FLAC__BitWriter *bw, FLAC__uint32 val, uint32_t bits)
 {
-	register unsigned left;
+	register uint32_t left;
 
 	/* WATCHOUT: code does not work with <32bit words; we can make things much faster with this assertion */
 	FLAC__ASSERT(FLAC__BITS_PER_WORD >= 32);
 
-	FLAC__ASSERT(0 != bw);
-	FLAC__ASSERT(0 != bw->buffer);
+	if(bw == 0 || bw->buffer == 0)
+		return false;
 
-	FLAC__ASSERT(bits <= 32);
+	if (bits > 32)
+		return false;
+
 	if(bits == 0)
 		return true;
 
+	FLAC__ASSERT((bits == 32) || (val>>bits == 0));
+
 	/* slightly pessimistic size check but faster than "<= bw->words + (bw->bits+bits+FLAC__BITS_PER_WORD-1)/FLAC__BITS_PER_WORD" */
 	if(bw->capacity <= bw->words + bits && !bitwriter_grow_(bw, bits))
 		return false;
@@ -330,102 +360,124 @@
 		bw->accum <<= left;
 		bw->accum |= val >> (bw->bits = bits - left);
 		bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum);
-		bw->accum = val;
+		bw->accum = val; /* unused top bits can contain garbage */
 	}
-	else {
-		bw->accum = val;
-		bw->bits = 0;
-		bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(val);
+	else { /* at this point bits == FLAC__BITS_PER_WORD == 32  and  bw->bits == 0 */
+		bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST((bwword)val);
 	}
 
 	return true;
 }
 
-inline FLAC__bool FLAC__bitwriter_write_raw_int32(FLAC__BitWriter *bw, FLAC__int32 val, unsigned bits)
+inline FLAC__bool FLAC__bitwriter_write_raw_uint32(FLAC__BitWriter *bw, FLAC__uint32 val, uint32_t bits)
+{
+	/* check that unused bits are unset */
+	if((bits < 32) && (val>>bits != 0))
+		return false;
+
+	return FLAC__bitwriter_write_raw_uint32_nocheck(bw, val, bits);
+}
+
+inline FLAC__bool FLAC__bitwriter_write_raw_int32(FLAC__BitWriter *bw, FLAC__int32 val, uint32_t bits)
 {
 	/* zero-out unused bits */
 	if(bits < 32)
 		val &= (~(0xffffffff << bits));
 
-	return FLAC__bitwriter_write_raw_uint32(bw, (FLAC__uint32)val, bits);
+	return FLAC__bitwriter_write_raw_uint32_nocheck(bw, (FLAC__uint32)val, bits);
 }
 
-inline FLAC__bool FLAC__bitwriter_write_raw_uint64(FLAC__BitWriter *bw, FLAC__uint64 val, unsigned bits)
+inline FLAC__bool FLAC__bitwriter_write_raw_uint64(FLAC__BitWriter *bw, FLAC__uint64 val, uint32_t bits)
 {
 	/* this could be a little faster but it's not used for much */
 	if(bits > 32) {
 		return
 			FLAC__bitwriter_write_raw_uint32(bw, (FLAC__uint32)(val>>32), bits-32) &&
-			FLAC__bitwriter_write_raw_uint32(bw, (FLAC__uint32)val, 32);
+			FLAC__bitwriter_write_raw_uint32_nocheck(bw, (FLAC__uint32)val, 32);
 	}
 	else
 		return FLAC__bitwriter_write_raw_uint32(bw, (FLAC__uint32)val, bits);
 }
 
+inline FLAC__bool FLAC__bitwriter_write_raw_int64(FLAC__BitWriter *bw, FLAC__int64 val, uint32_t bits)
+{
+	FLAC__uint64 uval = val;
+	/* zero-out unused bits */
+	if(bits < 64)
+		uval &= (~(UINT64_MAX << bits));
+	return FLAC__bitwriter_write_raw_uint64(bw, uval, bits);
+}
+
 inline FLAC__bool FLAC__bitwriter_write_raw_uint32_little_endian(FLAC__BitWriter *bw, FLAC__uint32 val)
 {
 	/* this doesn't need to be that fast as currently it is only used for vorbis comments */
 
-	if(!FLAC__bitwriter_write_raw_uint32(bw, val & 0xff, 8))
+	if(!FLAC__bitwriter_write_raw_uint32_nocheck(bw, val & 0xff, 8))
 		return false;
-	if(!FLAC__bitwriter_write_raw_uint32(bw, (val>>8) & 0xff, 8))
+	if(!FLAC__bitwriter_write_raw_uint32_nocheck(bw, (val>>8) & 0xff, 8))
 		return false;
-	if(!FLAC__bitwriter_write_raw_uint32(bw, (val>>16) & 0xff, 8))
+	if(!FLAC__bitwriter_write_raw_uint32_nocheck(bw, (val>>16) & 0xff, 8))
 		return false;
-	if(!FLAC__bitwriter_write_raw_uint32(bw, val>>24, 8))
+	if(!FLAC__bitwriter_write_raw_uint32_nocheck(bw, val>>24, 8))
 		return false;
 
 	return true;
 }
 
-inline FLAC__bool FLAC__bitwriter_write_byte_block(FLAC__BitWriter *bw, const FLAC__byte vals[], unsigned nvals)
+inline FLAC__bool FLAC__bitwriter_write_byte_block(FLAC__BitWriter *bw, const FLAC__byte vals[], uint32_t nvals)
 {
-	unsigned i;
+	uint32_t i;
+
+	/* grow capacity upfront to prevent constant reallocation during writes */
+	if(bw->capacity <= bw->words + nvals / (FLAC__BITS_PER_WORD / 8) + 1 && !bitwriter_grow_(bw, nvals * 8))
+		return false;
 
 	/* this could be faster but currently we don't need it to be since it's only used for writing metadata */
 	for(i = 0; i < nvals; i++) {
-		if(!FLAC__bitwriter_write_raw_uint32(bw, (FLAC__uint32)(vals[i]), 8))
+		if(!FLAC__bitwriter_write_raw_uint32_nocheck(bw, (FLAC__uint32)(vals[i]), 8))
 			return false;
 	}
 
 	return true;
 }
 
-FLAC__bool FLAC__bitwriter_write_unary_unsigned(FLAC__BitWriter *bw, unsigned val)
+FLAC__bool FLAC__bitwriter_write_unary_unsigned(FLAC__BitWriter *bw, uint32_t val)
 {
 	if(val < 32)
-		return FLAC__bitwriter_write_raw_uint32(bw, 1, ++val);
+		return FLAC__bitwriter_write_raw_uint32_nocheck(bw, 1, ++val);
 	else
 		return
 			FLAC__bitwriter_write_zeroes(bw, val) &&
-			FLAC__bitwriter_write_raw_uint32(bw, 1, 1);
+			FLAC__bitwriter_write_raw_uint32_nocheck(bw, 1, 1);
 }
 
-unsigned FLAC__bitwriter_rice_bits(FLAC__int32 val, unsigned parameter)
+uint32_t FLAC__bitwriter_rice_bits(FLAC__int32 val, uint32_t parameter)
 {
 	FLAC__uint32 uval;
 
-	FLAC__ASSERT(parameter < sizeof(unsigned)*8);
+	FLAC__ASSERT(parameter < 32);
 
-	/* fold signed to unsigned; actual formula is: negative(v)? -2v-1 : 2v */
-	uval = (val<<1) ^ (val>>31);
+	/* fold signed to uint32_t; actual formula is: negative(v)? -2v-1 : 2v */
+	uval = val;
+	uval <<= 1;
+	uval ^= (val>>31);
 
 	return 1 + parameter + (uval >> parameter);
 }
 
 #if 0 /* UNUSED */
-unsigned FLAC__bitwriter_golomb_bits_signed(int val, unsigned parameter)
+uint32_t FLAC__bitwriter_golomb_bits_signed(int val, uint32_t parameter)
 {
-	unsigned bits, msbs, uval;
-	unsigned k;
+	uint32_t bits, msbs, uval;
+	uint32_t k;
 
 	FLAC__ASSERT(parameter > 0);
 
-	/* fold signed to unsigned */
+	/* fold signed to uint32_t */
 	if(val < 0)
-		uval = (unsigned)(((-(++val)) << 1) + 1);
+		uval = (uint32_t)(((-(++val)) << 1) + 1);
 	else
-		uval = (unsigned)(val << 1);
+		uval = (uint32_t)(val << 1);
 
 	k = FLAC__bitmath_ilog2(parameter);
 	if(parameter == 1u<<k) {
@@ -435,7 +487,7 @@
 		bits = 1 + k + msbs;
 	}
 	else {
-		unsigned q, r, d;
+		uint32_t q, r, d;
 
 		d = (1 << (k+1)) - parameter;
 		q = uval / parameter;
@@ -448,10 +500,10 @@
 	return bits;
 }
 
-unsigned FLAC__bitwriter_golomb_bits_unsigned(unsigned uval, unsigned parameter)
+uint32_t FLAC__bitwriter_golomb_bits_unsigned(uint32_t uval, uint32_t parameter)
 {
-	unsigned bits, msbs;
-	unsigned k;
+	uint32_t bits, msbs;
+	uint32_t k;
 
 	FLAC__ASSERT(parameter > 0);
 
@@ -463,7 +515,7 @@
 		bits = 1 + k + msbs;
 	}
 	else {
-		unsigned q, r, d;
+		uint32_t q, r, d;
 
 		d = (1 << (k+1)) - parameter;
 		q = uval / parameter;
@@ -477,17 +529,19 @@
 }
 #endif /* UNUSED */
 
-FLAC__bool FLAC__bitwriter_write_rice_signed(FLAC__BitWriter *bw, FLAC__int32 val, unsigned parameter)
+FLAC__bool FLAC__bitwriter_write_rice_signed(FLAC__BitWriter *bw, FLAC__int32 val, uint32_t parameter)
 {
-	unsigned total_bits, interesting_bits, msbs;
+	uint32_t total_bits, interesting_bits, msbs;
 	FLAC__uint32 uval, pattern;
 
 	FLAC__ASSERT(0 != bw);
 	FLAC__ASSERT(0 != bw->buffer);
-	FLAC__ASSERT(parameter < 8*sizeof(uval));
+	FLAC__ASSERT(parameter < 32);
 
-	/* fold signed to unsigned; actual formula is: negative(v)? -2v-1 : 2v */
-	uval = (val<<1) ^ (val>>31);
+	/* fold signed to uint32_t; actual formula is: negative(v)? -2v-1 : 2v */
+	uval = val;
+	uval <<= 1;
+	uval ^= (val>>31);
 
 	msbs = uval >> parameter;
 	interesting_bits = 1 + parameter;
@@ -503,39 +557,42 @@
 			FLAC__bitwriter_write_raw_uint32(bw, pattern, interesting_bits); /* write the unary end bit and binary LSBs */
 }
 
-FLAC__bool FLAC__bitwriter_write_rice_signed_block(FLAC__BitWriter *bw, const FLAC__int32 *vals, unsigned nvals, unsigned parameter)
+FLAC__bool FLAC__bitwriter_write_rice_signed_block(FLAC__BitWriter *bw, const FLAC__int32 *vals, uint32_t nvals, uint32_t parameter)
 {
-	const FLAC__uint32 mask1 = FLAC__WORD_ALL_ONES << parameter; /* we val|=mask1 to set the stop bit above it... */
-	const FLAC__uint32 mask2 = FLAC__WORD_ALL_ONES >> (31-parameter); /* ...then mask off the bits above the stop bit with val&=mask2*/
+	const FLAC__uint32 mask1 = (FLAC__uint32)0xffffffff << parameter; /* we val|=mask1 to set the stop bit above it... */
+	const FLAC__uint32 mask2 = (FLAC__uint32)0xffffffff >> (31-parameter); /* ...then mask off the bits above the stop bit with val&=mask2 */
 	FLAC__uint32 uval;
-	unsigned left;
-	const unsigned lsbits = 1 + parameter;
-	unsigned msbits;
+	uint32_t left;
+	const uint32_t lsbits = 1 + parameter;
+	uint32_t msbits, total_bits;
 
 	FLAC__ASSERT(0 != bw);
 	FLAC__ASSERT(0 != bw->buffer);
-	FLAC__ASSERT(parameter < 8*sizeof(uint32_t)-1);
+	FLAC__ASSERT(parameter < 31);
 	/* WATCHOUT: code does not work with <32bit words; we can make things much faster with this assertion */
 	FLAC__ASSERT(FLAC__BITS_PER_WORD >= 32);
 
 	while(nvals) {
-		/* fold signed to unsigned; actual formula is: negative(v)? -2v-1 : 2v */
-		uval = (*vals<<1) ^ (*vals>>31);
+		/* fold signed to uint32_t; actual formula is: negative(v)? -2v-1 : 2v */
+		uval = *vals;
+		uval <<= 1;
+		uval ^= (*vals>>31);
 
 		msbits = uval >> parameter;
+		total_bits = lsbits + msbits;
 
-		if(bw->bits && bw->bits + msbits + lsbits < FLAC__BITS_PER_WORD) { /* i.e. if the whole thing fits in the current uint32_t */
-			/* ^^^ if bw->bits is 0 then we may have filled the buffer and have no free uint32_t to work in */
-			bw->bits = bw->bits + msbits + lsbits;
+		if(bw->bits && bw->bits + total_bits < FLAC__BITS_PER_WORD) { /* i.e. if the whole thing fits in the current bwword */
+			/* ^^^ if bw->bits is 0 then we may have filled the buffer and have no free bwword to work in */
+			bw->bits += total_bits;
 			uval |= mask1; /* set stop bit */
 			uval &= mask2; /* mask off unused top bits */
-			bw->accum <<= msbits + lsbits;
+			bw->accum <<= total_bits;
 			bw->accum |= uval;
 		}
 		else {
 			/* slightly pessimistic size check but faster than "<= bw->words + (bw->bits+msbits+lsbits+FLAC__BITS_PER_WORD-1)/FLAC__BITS_PER_WORD" */
 			/* OPT: pessimism may cause flurry of false calls to grow_ which eat up all savings before it */
-			if(bw->capacity <= bw->words + bw->bits + msbits + 1/*lsbits always fit in 1 uint32_t*/ && !bitwriter_grow_(bw, msbits+lsbits))
+			if(bw->capacity <= bw->words + bw->bits + msbits + 1 /* lsbits always fit in 1 bwword */ && !bitwriter_grow_(bw, total_bits))
 				return false;
 
 			if(msbits) {
@@ -585,7 +642,7 @@
 				bw->accum <<= left;
 				bw->accum |= uval >> (bw->bits = lsbits - left);
 				bw->buffer[bw->words++] = SWAP_BE_WORD_TO_HOST(bw->accum);
-				bw->accum = uval;
+				bw->accum = uval; /* unused top bits can contain garbage */
 			}
 		}
 		vals++;
@@ -595,24 +652,24 @@
 }
 
 #if 0 /* UNUSED */
-FLAC__bool FLAC__bitwriter_write_golomb_signed(FLAC__BitWriter *bw, int val, unsigned parameter)
+FLAC__bool FLAC__bitwriter_write_golomb_signed(FLAC__BitWriter *bw, int val, uint32_t parameter)
 {
-	unsigned total_bits, msbs, uval;
-	unsigned k;
+	uint32_t total_bits, msbs, uval;
+	uint32_t k;
 
 	FLAC__ASSERT(0 != bw);
 	FLAC__ASSERT(0 != bw->buffer);
 	FLAC__ASSERT(parameter > 0);
 
-	/* fold signed to unsigned */
+	/* fold signed to uint32_t */
 	if(val < 0)
-		uval = (unsigned)(((-(++val)) << 1) + 1);
+		uval = (uint32_t)(((-(++val)) << 1) + 1);
 	else
-		uval = (unsigned)(val << 1);
+		uval = (uint32_t)(val << 1);
 
 	k = FLAC__bitmath_ilog2(parameter);
 	if(parameter == 1u<<k) {
-		unsigned pattern;
+		uint32_t pattern;
 
 		FLAC__ASSERT(k <= 30);
 
@@ -635,7 +692,7 @@
 		}
 	}
 	else {
-		unsigned q, r, d;
+		uint32_t q, r, d;
 
 		d = (1 << (k+1)) - parameter;
 		q = uval / parameter;
@@ -659,10 +716,10 @@
 	return true;
 }
 
-FLAC__bool FLAC__bitwriter_write_golomb_unsigned(FLAC__BitWriter *bw, unsigned uval, unsigned parameter)
+FLAC__bool FLAC__bitwriter_write_golomb_unsigned(FLAC__BitWriter *bw, uint32_t uval, uint32_t parameter)
 {
-	unsigned total_bits, msbs;
-	unsigned k;
+	uint32_t total_bits, msbs;
+	uint32_t k;
 
 	FLAC__ASSERT(0 != bw);
 	FLAC__ASSERT(0 != bw->buffer);
@@ -670,7 +727,7 @@
 
 	k = FLAC__bitmath_ilog2(parameter);
 	if(parameter == 1u<<k) {
-		unsigned pattern;
+		uint32_t pattern;
 
 		FLAC__ASSERT(k <= 30);
 
@@ -693,7 +750,7 @@
 		}
 	}
 	else {
-		unsigned q, r, d;
+		uint32_t q, r, d;
 
 		d = (1 << (k+1)) - parameter;
 		q = uval / parameter;
@@ -725,40 +782,41 @@
 	FLAC__ASSERT(0 != bw);
 	FLAC__ASSERT(0 != bw->buffer);
 
-	FLAC__ASSERT(!(val & 0x80000000)); /* this version only handles 31 bits */
+	if((val & 0x80000000) != 0) /* this version only handles 31 bits */
+		return false;
 
 	if(val < 0x80) {
-		return FLAC__bitwriter_write_raw_uint32(bw, val, 8);
+		return FLAC__bitwriter_write_raw_uint32_nocheck(bw, val, 8);
 	}
 	else if(val < 0x800) {
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0xC0 | (val>>6), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (val&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0xC0 | (val>>6), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (val&0x3F), 8);
 	}
 	else if(val < 0x10000) {
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0xE0 | (val>>12), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | ((val>>6)&0x3F), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (val&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0xE0 | (val>>12), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | ((val>>6)&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (val&0x3F), 8);
 	}
 	else if(val < 0x200000) {
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0xF0 | (val>>18), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | ((val>>12)&0x3F), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | ((val>>6)&0x3F), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (val&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0xF0 | (val>>18), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | ((val>>12)&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | ((val>>6)&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (val&0x3F), 8);
 	}
 	else if(val < 0x4000000) {
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0xF8 | (val>>24), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | ((val>>18)&0x3F), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | ((val>>12)&0x3F), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | ((val>>6)&0x3F), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (val&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0xF8 | (val>>24), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | ((val>>18)&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | ((val>>12)&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | ((val>>6)&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (val&0x3F), 8);
 	}
 	else {
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0xFC | (val>>30), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | ((val>>24)&0x3F), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | ((val>>18)&0x3F), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | ((val>>12)&0x3F), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | ((val>>6)&0x3F), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (val&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0xFC | (val>>30), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | ((val>>24)&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | ((val>>18)&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | ((val>>12)&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | ((val>>6)&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (val&0x3F), 8);
 	}
 
 	return ok;
@@ -771,49 +829,50 @@
 	FLAC__ASSERT(0 != bw);
 	FLAC__ASSERT(0 != bw->buffer);
 
-	FLAC__ASSERT(!(val & FLAC__U64L(0xFFFFFFF000000000))); /* this version only handles 36 bits */
+	if((val & FLAC__U64L(0xFFFFFFF000000000)) != 0) /* this version only handles 36 bits */
+		return false;
 
 	if(val < 0x80) {
-		return FLAC__bitwriter_write_raw_uint32(bw, (FLAC__uint32)val, 8);
+		return FLAC__bitwriter_write_raw_uint32_nocheck(bw, (FLAC__uint32)val, 8);
 	}
 	else if(val < 0x800) {
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0xC0 | (FLAC__uint32)(val>>6), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)(val&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0xC0 | (FLAC__uint32)(val>>6), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)(val&0x3F), 8);
 	}
 	else if(val < 0x10000) {
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0xE0 | (FLAC__uint32)(val>>12), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)((val>>6)&0x3F), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)(val&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0xE0 | (FLAC__uint32)(val>>12), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)((val>>6)&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)(val&0x3F), 8);
 	}
 	else if(val < 0x200000) {
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0xF0 | (FLAC__uint32)(val>>18), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)((val>>12)&0x3F), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)((val>>6)&0x3F), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)(val&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0xF0 | (FLAC__uint32)(val>>18), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)((val>>12)&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)((val>>6)&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)(val&0x3F), 8);
 	}
 	else if(val < 0x4000000) {
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0xF8 | (FLAC__uint32)(val>>24), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)((val>>18)&0x3F), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)((val>>12)&0x3F), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)((val>>6)&0x3F), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)(val&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0xF8 | (FLAC__uint32)(val>>24), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)((val>>18)&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)((val>>12)&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)((val>>6)&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)(val&0x3F), 8);
 	}
 	else if(val < 0x80000000) {
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0xFC | (FLAC__uint32)(val>>30), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)((val>>24)&0x3F), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)((val>>18)&0x3F), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)((val>>12)&0x3F), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)((val>>6)&0x3F), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)(val&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0xFC | (FLAC__uint32)(val>>30), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)((val>>24)&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)((val>>18)&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)((val>>12)&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)((val>>6)&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)(val&0x3F), 8);
 	}
 	else {
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0xFE, 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)((val>>30)&0x3F), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)((val>>24)&0x3F), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)((val>>18)&0x3F), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)((val>>12)&0x3F), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)((val>>6)&0x3F), 8);
-		ok &= FLAC__bitwriter_write_raw_uint32(bw, 0x80 | (FLAC__uint32)(val&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0xFE, 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)((val>>30)&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)((val>>24)&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)((val>>18)&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)((val>>12)&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)((val>>6)&0x3F), 8);
+		ok &= FLAC__bitwriter_write_raw_uint32_nocheck(bw, 0x80 | (FLAC__uint32)(val&0x3F), 8);
 	}
 
 	return ok;
@@ -836,9 +895,10 @@
  * Unfortunately, the Microsoft VS compiler doesn't pick them up externally. To
  * fix that we add extern declarations here.
  */
-extern FLAC__bool FLAC__bitwriter_write_zeroes(FLAC__BitWriter *bw, unsigned bits);
-extern FLAC__bool FLAC__bitwriter_write_raw_int32(FLAC__BitWriter *bw, FLAC__int32 val, unsigned bits);
-extern FLAC__bool FLAC__bitwriter_write_raw_uint32(FLAC__BitWriter *bw, FLAC__uint32 val, unsigned bits);
-extern FLAC__bool FLAC__bitwriter_write_raw_uint64(FLAC__BitWriter *bw, FLAC__uint64 val, unsigned bits);
+extern FLAC__bool FLAC__bitwriter_write_zeroes(FLAC__BitWriter *bw, uint32_t bits);
+extern FLAC__bool FLAC__bitwriter_write_raw_uint32(FLAC__BitWriter *bw, FLAC__uint32 val, uint32_t bits);
+extern FLAC__bool FLAC__bitwriter_write_raw_int32(FLAC__BitWriter *bw, FLAC__int32 val, uint32_t bits);
+extern FLAC__bool FLAC__bitwriter_write_raw_uint64(FLAC__BitWriter *bw, FLAC__uint64 val, uint32_t bits);
+extern FLAC__bool FLAC__bitwriter_write_raw_int64(FLAC__BitWriter *bw, FLAC__int64 val, uint32_t bits);
 extern FLAC__bool FLAC__bitwriter_write_raw_uint32_little_endian(FLAC__BitWriter *bw, FLAC__uint32 val);
-extern FLAC__bool FLAC__bitwriter_write_byte_block(FLAC__BitWriter *bw, const FLAC__byte vals[], unsigned nvals);
+extern FLAC__bool FLAC__bitwriter_write_byte_block(FLAC__BitWriter *bw, const FLAC__byte vals[], uint32_t nvals);

diff --git a/src/libFLAC/cpu.c b/src/libFLAC/cpu.c
index bb09506..cba0ad0 100644
--- a/src/libFLAC/cpu.c
+++ b/src/libFLAC/cpu.c

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2001-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -35,371 +35,9 @@
 #endif
 
 #include "private/cpu.h"
+#include "share/compat.h"
 #include <stdlib.h>
-#include <memory.h>
-#ifdef DEBUG
-# include <stdio.h>
-#endif
-
-#if defined FLAC__CPU_IA32
-# include <signal.h>
-
-static void disable_sse(FLAC__CPUInfo *info)
-{
-	info->ia32.sse   = false;
-	info->ia32.sse2  = false;
-	info->ia32.sse3  = false;
-	info->ia32.ssse3 = false;
-	info->ia32.sse41 = false;
-	info->ia32.sse42 = false;
-}
-
-static void disable_avx(FLAC__CPUInfo *info)
-{
-	info->ia32.avx     = false;
-	info->ia32.avx2    = false;
-	info->ia32.fma     = false;
-}
-
-#elif defined FLAC__CPU_X86_64
-
-static void disable_avx(FLAC__CPUInfo *info)
-{
-	info->x86.avx     = false;
-	info->x86.avx2    = false;
-	info->x86.fma     = false;
-}
-#endif
-
-#if defined (__NetBSD__) || defined(__OpenBSD__)
-#include <sys/param.h>
-#include <sys/sysctl.h>
-#include <machine/cpu.h>
-#endif
-
-#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__)
-#include <sys/types.h>
-#include <sys/sysctl.h>
-#endif
-
-#if defined(__APPLE__)
-/* how to get sysctlbyname()? */
-#endif
-
-#ifdef FLAC__CPU_IA32
-/* these are flags in EDX of CPUID AX=00000001 */
-static const unsigned FLAC__CPUINFO_IA32_CPUID_CMOV = 0x00008000;
-static const unsigned FLAC__CPUINFO_IA32_CPUID_MMX = 0x00800000;
-static const unsigned FLAC__CPUINFO_IA32_CPUID_FXSR = 0x01000000;
-static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE = 0x02000000;
-static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE2 = 0x04000000;
-#endif
-
-/* these are flags in ECX of CPUID AX=00000001 */
-static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE3 = 0x00000001;
-static const unsigned FLAC__CPUINFO_IA32_CPUID_SSSE3 = 0x00000200;
-static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE41 = 0x00080000;
-static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE42 = 0x00100000;
-
-#if defined FLAC__AVX_SUPPORTED
-/* these are flags in ECX of CPUID AX=00000001 */
-static const unsigned FLAC__CPUINFO_IA32_CPUID_OSXSAVE = 0x08000000;
-static const unsigned FLAC__CPUINFO_IA32_CPUID_AVX = 0x10000000;
-static const unsigned FLAC__CPUINFO_IA32_CPUID_FMA = 0x00001000;
-/* these are flags in EBX of CPUID AX=00000007 */
-static const unsigned FLAC__CPUINFO_IA32_CPUID_AVX2 = 0x00000020;
-#endif
-
-/*
- * Extra stuff needed for detection of OS support for SSE on IA-32
- */
-#if defined(FLAC__CPU_IA32) && !defined FLAC__NO_ASM && (defined FLAC__HAS_NASM || defined FLAC__HAS_X86INTRIN) && !defined FLAC__NO_SSE_OS && !defined FLAC__SSE_OS
-# if defined(__linux__)
-/*
- * If the OS doesn't support SSE, we will get here with a SIGILL.  We
- * modify the return address to jump over the offending SSE instruction
- * and also the operation following it that indicates the instruction
- * executed successfully.  In this way we use no global variables and
- * stay thread-safe.
- *
- * 3 + 3 + 6:
- *   3 bytes for "xorps xmm0,xmm0"
- *   3 bytes for estimate of how long the follwing "inc var" instruction is
- *   6 bytes extra in case our estimate is wrong
- * 12 bytes puts us in the NOP "landing zone"
- */
-#   include <sys/ucontext.h>
-	static void sigill_handler_sse_os(int signal, siginfo_t *si, void *uc)
-	{
-		(void)signal, (void)si;
-		((ucontext_t*)uc)->uc_mcontext.gregs[14/*REG_EIP*/] += 3 + 3 + 6;
-	}
-# elif defined(_MSC_VER)
-#  include <windows.h>
-# endif
-#endif
-
-
-void FLAC__cpu_info(FLAC__CPUInfo *info)
-{
-/*
- * IA32-specific
- */
-#ifdef FLAC__CPU_IA32
-	FLAC__bool ia32_fxsr = false;
-	FLAC__bool ia32_osxsave = false;
-	(void) ia32_fxsr; (void) ia32_osxsave; /* to avoid warnings about unused variables */
-	memset(info, 0, sizeof(*info));
-	info->type = FLAC__CPUINFO_TYPE_IA32;
-#if !defined FLAC__NO_ASM && (defined FLAC__HAS_NASM || defined FLAC__HAS_X86INTRIN)
-	info->use_asm = true; /* we assume a minimum of 80386 with FLAC__CPU_IA32 */
-#ifdef FLAC__HAS_X86INTRIN
-	if(!FLAC__cpu_have_cpuid_x86())
-		return;
-#else
-	if(!FLAC__cpu_have_cpuid_asm_ia32())
-		return;
-#endif
-	{
-		/* http://www.sandpile.org/x86/cpuid.htm */
-#ifdef FLAC__HAS_X86INTRIN
-		FLAC__uint32 flags_eax, flags_ebx, flags_ecx, flags_edx;
-		FLAC__cpu_info_x86(1, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx);
-#else
-		FLAC__uint32 flags_ecx, flags_edx;
-		FLAC__cpu_info_asm_ia32(&flags_edx, &flags_ecx);
-#endif
-		info->ia32.cmov  = (flags_edx & FLAC__CPUINFO_IA32_CPUID_CMOV )? true : false;
-		info->ia32.mmx   = (flags_edx & FLAC__CPUINFO_IA32_CPUID_MMX  )? true : false;
-		      ia32_fxsr  = (flags_edx & FLAC__CPUINFO_IA32_CPUID_FXSR )? true : false;
-		info->ia32.sse   = (flags_edx & FLAC__CPUINFO_IA32_CPUID_SSE  )? true : false;
-		info->ia32.sse2  = (flags_edx & FLAC__CPUINFO_IA32_CPUID_SSE2 )? true : false;
-		info->ia32.sse3  = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE3 )? true : false;
-		info->ia32.ssse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSSE3)? true : false;
-		info->ia32.sse41 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE41)? true : false;
-		info->ia32.sse42 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE42)? true : false;
-#if defined FLAC__HAS_X86INTRIN && defined FLAC__AVX_SUPPORTED
-		    ia32_osxsave = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_OSXSAVE)? true : false;
-		info->ia32.avx   = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_AVX    )? true : false;
-		info->ia32.fma   = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_FMA    )? true : false;
-		FLAC__cpu_info_x86(7, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx);
-		info->ia32.avx2  = (flags_ebx & FLAC__CPUINFO_IA32_CPUID_AVX2   )? true : false;
-#endif
-	}
-
-#ifdef DEBUG
-	fprintf(stderr, "CPU info (IA-32):\n");
-	fprintf(stderr, "  CMOV ....... %c\n", info->ia32.cmov    ? 'Y' : 'n');
-	fprintf(stderr, "  MMX ........ %c\n", info->ia32.mmx     ? 'Y' : 'n');
-	fprintf(stderr, "  SSE ........ %c\n", info->ia32.sse     ? 'Y' : 'n');
-	fprintf(stderr, "  SSE2 ....... %c\n", info->ia32.sse2    ? 'Y' : 'n');
-	fprintf(stderr, "  SSE3 ....... %c\n", info->ia32.sse3    ? 'Y' : 'n');
-	fprintf(stderr, "  SSSE3 ...... %c\n", info->ia32.ssse3   ? 'Y' : 'n');
-	fprintf(stderr, "  SSE41 ...... %c\n", info->ia32.sse41   ? 'Y' : 'n');
-	fprintf(stderr, "  SSE42 ...... %c\n", info->ia32.sse42   ? 'Y' : 'n');
-# if defined FLAC__HAS_X86INTRIN && defined FLAC__AVX_SUPPORTED
-	fprintf(stderr, "  AVX ........ %c\n", info->ia32.avx     ? 'Y' : 'n');
-	fprintf(stderr, "  FMA ........ %c\n", info->ia32.fma     ? 'Y' : 'n');
-	fprintf(stderr, "  AVX2 ....... %c\n", info->ia32.avx2    ? 'Y' : 'n');
-# endif
-#endif
-
-	/*
-	 * now have to check for OS support of SSE instructions
-	 */
-	if(info->ia32.sse) {
-#if defined FLAC__NO_SSE_OS
-		/* assume user knows better than us; turn it off */
-		disable_sse(info);
-#elif defined FLAC__SSE_OS
-		/* assume user knows better than us; leave as detected above */
-#elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__) || defined(__APPLE__)
-		int sse = 0;
-		size_t len;
-		/* at least one of these must work: */
-		len = sizeof(sse); sse = sse || (sysctlbyname("hw.instruction_sse", &sse, &len, NULL, 0) == 0 && sse);
-		len = sizeof(sse); sse = sse || (sysctlbyname("hw.optional.sse"   , &sse, &len, NULL, 0) == 0 && sse); /* __APPLE__ ? */
-		if(!sse)
-			disable_sse(info);
-#elif defined(__NetBSD__) || defined (__OpenBSD__)
-# if __NetBSD_Version__ >= 105250000 || (defined __OpenBSD__)
-		int val = 0, mib[2] = { CTL_MACHDEP, CPU_SSE };
-		size_t len = sizeof(val);
-		if(sysctl(mib, 2, &val, &len, NULL, 0) < 0 || !val)
-			disable_sse(info);
-		else { /* double-check SSE2 */
-			mib[1] = CPU_SSE2;
-			len = sizeof(val);
-			if(sysctl(mib, 2, &val, &len, NULL, 0) < 0 || !val) {
-				disable_sse(info);
-				info->ia32.sse = true;
-			}
-		}
-# else
-		disable_sse(info);
-# endif
-#elif defined(__linux__)
-		int sse = 0;
-		struct sigaction sigill_save;
-		struct sigaction sigill_sse;
-		sigill_sse.sa_sigaction = sigill_handler_sse_os;
-		__sigemptyset(&sigill_sse.sa_mask);
-		sigill_sse.sa_flags = SA_SIGINFO | SA_RESETHAND; /* SA_RESETHAND just in case our SIGILL return jump breaks, so we don't get stuck in a loop */
-		if(0 == sigaction(SIGILL, &sigill_sse, &sigill_save))
-		{
-			/* http://www.ibiblio.org/gferg/ldp/GCC-Inline-Assembly-HOWTO.html */
-			/* see sigill_handler_sse_os() for an explanation of the following: */
-			asm volatile (
-				"xorps %%xmm0,%%xmm0\n\t" /* will cause SIGILL if unsupported by OS */
-				"incl %0\n\t"             /* SIGILL handler will jump over this */
-				/* landing zone */
-				"nop\n\t" /* SIGILL jump lands here if "inc" is 9 bytes */
-				"nop\n\t"
-				"nop\n\t"
-				"nop\n\t"
-				"nop\n\t"
-				"nop\n\t"
-				"nop\n\t" /* SIGILL jump lands here if "inc" is 3 bytes (expected) */
-				"nop\n\t"
-				"nop"     /* SIGILL jump lands here if "inc" is 1 byte */
-				: "=r"(sse)
-				: "0"(sse)
-			);
-
-			sigaction(SIGILL, &sigill_save, NULL);
-		}
-
-		if(!sse)
-			disable_sse(info);
-#elif defined(_MSC_VER)
-		__try {
-			__asm {
-				xorps xmm0,xmm0
-			}
-		}
-		__except(EXCEPTION_EXECUTE_HANDLER) {
-			if (_exception_code() == STATUS_ILLEGAL_INSTRUCTION)
-				disable_sse(info);
-		}
-#elif defined(__GNUC__) /* MinGW goes here */
-		int sse = 0;
-		/* Based on the idea described in Agner Fog's manual "Optimizing subroutines in assembly language" */
-		/* In theory, not guaranteed to detect lack of OS SSE support on some future Intel CPUs, but in practice works (see the aforementioned manual) */
-		if (ia32_fxsr) {
-			struct {
-				FLAC__uint32 buff[128];
-			} __attribute__((aligned(16))) fxsr;
-			FLAC__uint32 old_val, new_val;
-
-			asm volatile ("fxsave %0"  : "=m" (fxsr) : "m" (fxsr));
-			old_val = fxsr.buff[50];
-			fxsr.buff[50] ^= 0x0013c0de;                             /* change value in the buffer */
-			asm volatile ("fxrstor %0" : "=m" (fxsr) : "m" (fxsr));  /* try to change SSE register */
-			fxsr.buff[50] = old_val;                                 /* restore old value in the buffer */
-			asm volatile ("fxsave %0 " : "=m" (fxsr) : "m" (fxsr));  /* old value will be overwritten if SSE register was changed */
-			new_val = fxsr.buff[50];                                 /* == old_val if FXRSTOR didn't change SSE register and (old_val ^ 0x0013c0de) otherwise */
-			fxsr.buff[50] = old_val;                                 /* again restore old value in the buffer */
-			asm volatile ("fxrstor %0" : "=m" (fxsr) : "m" (fxsr));  /* restore old values of registers */
-
-			if ((old_val^new_val) == 0x0013c0de)
-				sse = 1;
-		}
-		if(!sse)
-			disable_sse(info);
-#else
-		/* no way to test, disable to be safe */
-		disable_sse(info);
-#endif
-#ifdef DEBUG
-		fprintf(stderr, "  SSE OS sup . %c\n", info->ia32.sse ? 'Y' : 'n');
-#endif
-	}
-	else /* info->ia32.sse == false */
-		disable_sse(info);
-
-	/*
-	 * now have to check for OS support of AVX instructions
-	 */
-	if(info->ia32.avx && ia32_osxsave) {
-		FLAC__uint32 ecr = FLAC__cpu_xgetbv_x86();
-		if ((ecr & 0x6) != 0x6)
-			disable_avx(info);
-#ifdef DEBUG
-		fprintf(stderr, "  AVX OS sup . %c\n", info->ia32.avx ? 'Y' : 'n');
-#endif
-	}
-	else /* no OS AVX support*/
-		disable_avx(info);
-#else
-	info->use_asm = false;
-#endif
-
-/*
- * x86-64-specific
- */
-#elif defined FLAC__CPU_X86_64
-	FLAC__bool x86_osxsave = false;
-	(void) x86_osxsave; /* to avoid warnings about unused variables */
-	memset(info, 0, sizeof(*info));
-	info->type = FLAC__CPUINFO_TYPE_X86_64;
-#if !defined FLAC__NO_ASM && defined FLAC__HAS_X86INTRIN
-	info->use_asm = true;
-	{
-		/* http://www.sandpile.org/x86/cpuid.htm */
-		FLAC__uint32 flags_eax, flags_ebx, flags_ecx, flags_edx;
-		FLAC__cpu_info_x86(1, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx);
-		info->x86.sse3  = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE3 )? true : false;
-		info->x86.ssse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSSE3)? true : false;
-		info->x86.sse41 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE41)? true : false;
-		info->x86.sse42 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE42)? true : false;
-#if defined FLAC__AVX_SUPPORTED
-		    x86_osxsave = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_OSXSAVE)? true : false;
-		info->x86.avx   = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_AVX    )? true : false;
-		info->x86.fma   = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_FMA    )? true : false;
-		FLAC__cpu_info_x86(7, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx);
-		info->x86.avx2  = (flags_ebx & FLAC__CPUINFO_IA32_CPUID_AVX2   )? true : false;
-#endif
-	}
-#ifdef DEBUG
-	fprintf(stderr, "CPU info (x86-64):\n");
-	fprintf(stderr, "  SSE3 ....... %c\n", info->x86.sse3  ? 'Y' : 'n');
-	fprintf(stderr, "  SSSE3 ...... %c\n", info->x86.ssse3 ? 'Y' : 'n');
-	fprintf(stderr, "  SSE41 ...... %c\n", info->x86.sse41 ? 'Y' : 'n');
-	fprintf(stderr, "  SSE42 ...... %c\n", info->x86.sse42 ? 'Y' : 'n');
-# if defined FLAC__AVX_SUPPORTED
-	fprintf(stderr, "  AVX ........ %c\n", info->x86.avx   ? 'Y' : 'n');
-	fprintf(stderr, "  FMA ........ %c\n", info->x86.fma   ? 'Y' : 'n');
-	fprintf(stderr, "  AVX2 ....... %c\n", info->x86.avx2  ? 'Y' : 'n');
-# endif
-#endif
-
-	/*
-	 * now have to check for OS support of AVX instructions
-	 */
-	if(info->x86.avx && x86_osxsave) {
-		FLAC__uint32 ecr = FLAC__cpu_xgetbv_x86();
-		if ((ecr & 0x6) != 0x6)
-			disable_avx(info);
-#ifdef DEBUG
-		fprintf(stderr, "  AVX OS sup . %c\n", info->x86.avx ? 'Y' : 'n');
-#endif
-	}
-	else /* no OS AVX support*/
-		disable_avx(info);
-#else
-	info->use_asm = false;
-#endif
-
-/*
- * unknown CPU
- */
-#else
-	info->type = FLAC__CPUINFO_TYPE_UNKNOWN;
-	info->use_asm = false;
-#endif
-}
-
-#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
+#include <string.h>
 
 #if defined _MSC_VER
 #include <intrin.h> /* for __cpuid() and _xgetbv() */
@@ -407,12 +45,64 @@
 #include <cpuid.h> /* for __get_cpuid() and __get_cpuid_max() */
 #endif
 
-FLAC__uint32 FLAC__cpu_have_cpuid_x86(void)
-{
-#ifdef FLAC__CPU_X86_64
-	return 1;
+#ifndef NDEBUG
+#include <stdio.h>
+#define dfprintf fprintf
 #else
-# if defined _MSC_VER || defined __INTEL_COMPILER /* Do they support CPUs w/o CPUID support (or OSes that work on those CPUs)? */
+/* This is bad practice, it should be a static void empty function */
+#define dfprintf(file, format, ...)
+#endif
+
+#if defined(HAVE_SYS_AUXV_H)
+#include <sys/auxv.h>
+#endif
+
+#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN && !defined FLAC__NO_ASM
+
+/* these are flags in EDX of CPUID AX=00000001 */
+static const uint32_t FLAC__CPUINFO_X86_CPUID_CMOV    = 0x00008000;
+static const uint32_t FLAC__CPUINFO_X86_CPUID_MMX     = 0x00800000;
+static const uint32_t FLAC__CPUINFO_X86_CPUID_SSE     = 0x02000000;
+static const uint32_t FLAC__CPUINFO_X86_CPUID_SSE2    = 0x04000000;
+
+/* these are flags in ECX of CPUID AX=00000001 */
+static const uint32_t FLAC__CPUINFO_X86_CPUID_SSE3    = 0x00000001;
+static const uint32_t FLAC__CPUINFO_X86_CPUID_SSSE3   = 0x00000200;
+static const uint32_t FLAC__CPUINFO_X86_CPUID_SSE41   = 0x00080000;
+static const uint32_t FLAC__CPUINFO_X86_CPUID_SSE42   = 0x00100000;
+static const uint32_t FLAC__CPUINFO_X86_CPUID_OSXSAVE = 0x08000000;
+static const uint32_t FLAC__CPUINFO_X86_CPUID_AVX     = 0x10000000;
+static const uint32_t FLAC__CPUINFO_X86_CPUID_FMA     = 0x00001000;
+
+/* these are flags in EBX of CPUID AX=00000007 */
+static const uint32_t FLAC__CPUINFO_X86_CPUID_AVX2    = 0x00000020;
+
+static uint32_t
+cpu_xgetbv_x86(void)
+{
+#if (defined _MSC_VER || defined __INTEL_COMPILER) && FLAC__AVX_SUPPORTED
+	return (uint32_t)_xgetbv(0);
+#elif defined __GNUC__
+	uint32_t lo, hi;
+	__asm__ volatile (".byte 0x0f, 0x01, 0xd0" : "=a"(lo), "=d"(hi) : "c" (0));
+	return lo;
+#else
+	return 0;
+#endif
+}
+
+static uint32_t
+cpu_have_cpuid(void)
+{
+#if defined FLAC__CPU_X86_64 || defined __i686__ || defined __SSE__ || (defined _M_IX86_FP && _M_IX86_FP > 0)
+	/* target CPU does have CPUID instruction */
+	return 1;
+#elif defined __GNUC__ && defined HAVE_CPUID_H
+	if (__get_cpuid_max(0, 0) != 0)
+		return 1;
+	else
+		return 0;
+#elif defined _MSC_VER
 	FLAC__uint32 flags1, flags2;
 	__asm {
 		pushfd
@@ -431,57 +121,178 @@
 		return 1;
 	else
 		return 0;
-# elif defined __GNUC__ && defined HAVE_CPUID_H
-	if (__get_cpuid_max(0, 0) != 0)
-		return 1;
-	else
-		return 0;
-# else
+#else
 	return 0;
-# endif
 #endif
 }
 
-void FLAC__cpu_info_x86(FLAC__uint32 level, FLAC__uint32 *eax, FLAC__uint32 *ebx, FLAC__uint32 *ecx, FLAC__uint32 *edx)
+static void
+cpuinfo_x86(FLAC__uint32 level, FLAC__uint32 *eax, FLAC__uint32 *ebx, FLAC__uint32 *ecx, FLAC__uint32 *edx)
 {
-#if defined _MSC_VER || defined __INTEL_COMPILER
+#if defined _MSC_VER
 	int cpuinfo[4];
 	int ext = level & 0x80000000;
 	__cpuid(cpuinfo, ext);
-	if((unsigned)cpuinfo[0] < level) {
-		*eax = *ebx = *ecx = *edx = 0;
+	if ((uint32_t)cpuinfo[0] >= level) {
+#if FLAC__AVX_SUPPORTED
+		__cpuidex(cpuinfo, level, 0); /* for AVX2 detection */
+#else
+		__cpuid(cpuinfo, level); /* some old compilers don't support __cpuidex */
+#endif
+		*eax = cpuinfo[0]; *ebx = cpuinfo[1]; *ecx = cpuinfo[2]; *edx = cpuinfo[3];
 		return;
 	}
-#if defined FLAC__AVX_SUPPORTED
-	__cpuidex(cpuinfo, level, 0); /* for AVX2 detection */
-#else
-	__cpuid(cpuinfo, level); /* some old compilers don't support __cpuidex */
-#endif
-	*eax = cpuinfo[0]; *ebx = cpuinfo[1]; *ecx = cpuinfo[2]; *edx = cpuinfo[3];
 #elif defined __GNUC__ && defined HAVE_CPUID_H
 	FLAC__uint32 ext = level & 0x80000000;
 	__cpuid(ext, *eax, *ebx, *ecx, *edx);
-	if (*eax < level) {
-		*eax = *ebx = *ecx = *edx = 0;
+	if (*eax >= level) {
+		__cpuid_count(level, 0, *eax, *ebx, *ecx, *edx);
 		return;
 	}
-	__cpuid_count(level, 0, *eax, *ebx, *ecx, *edx);
-#else
+#endif
 	*eax = *ebx = *ecx = *edx = 0;
-#endif
 }
 
-FLAC__uint32 FLAC__cpu_xgetbv_x86(void)
+#endif
+
+static void
+x86_cpu_info (FLAC__CPUInfo *info)
 {
-#if (defined _MSC_VER || defined __INTEL_COMPILER) && defined FLAC__AVX_SUPPORTED
-	return (FLAC__uint32)_xgetbv(0);
-#elif defined __GNUC__
-	FLAC__uint32 lo, hi;
-	asm volatile (".byte 0x0f, 0x01, 0xd0" : "=a"(lo), "=d"(hi) : "c" (0));
-	return lo;
+#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN && !defined FLAC__NO_ASM
+	FLAC__bool x86_osxsave = false;
+	FLAC__bool os_avx = false;
+	FLAC__uint32 flags_eax, flags_ebx, flags_ecx, flags_edx;
+
+	info->use_asm = true; /* we assume a minimum of 80386 */
+	if (!cpu_have_cpuid())
+		return;
+
+	cpuinfo_x86(0, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx);
+	info->x86.intel = (flags_ebx == 0x756E6547 && flags_edx == 0x49656E69 && flags_ecx == 0x6C65746E) ? true : false; /* GenuineIntel */
+	cpuinfo_x86(1, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx);
+
+	info->x86.cmov  = (flags_edx & FLAC__CPUINFO_X86_CPUID_CMOV ) ? true : false;
+	info->x86.mmx   = (flags_edx & FLAC__CPUINFO_X86_CPUID_MMX  ) ? true : false;
+	info->x86.sse   = (flags_edx & FLAC__CPUINFO_X86_CPUID_SSE  ) ? true : false;
+	info->x86.sse2  = (flags_edx & FLAC__CPUINFO_X86_CPUID_SSE2 ) ? true : false;
+	info->x86.sse3  = (flags_ecx & FLAC__CPUINFO_X86_CPUID_SSE3 ) ? true : false;
+	info->x86.ssse3 = (flags_ecx & FLAC__CPUINFO_X86_CPUID_SSSE3) ? true : false;
+	info->x86.sse41 = (flags_ecx & FLAC__CPUINFO_X86_CPUID_SSE41) ? true : false;
+	info->x86.sse42 = (flags_ecx & FLAC__CPUINFO_X86_CPUID_SSE42) ? true : false;
+
+	if (FLAC__AVX_SUPPORTED) {
+		x86_osxsave     = (flags_ecx & FLAC__CPUINFO_X86_CPUID_OSXSAVE) ? true : false;
+		info->x86.avx   = (flags_ecx & FLAC__CPUINFO_X86_CPUID_AVX    ) ? true : false;
+		info->x86.fma   = (flags_ecx & FLAC__CPUINFO_X86_CPUID_FMA    ) ? true : false;
+		cpuinfo_x86(7, &flags_eax, &flags_ebx, &flags_ecx, &flags_edx);
+		info->x86.avx2  = (flags_ebx & FLAC__CPUINFO_X86_CPUID_AVX2   ) ? true : false;
+	}
+
+#if defined FLAC__CPU_IA32
+	dfprintf(stderr, "CPU info (IA-32):\n");
 #else
-	return 0;
+	dfprintf(stderr, "CPU info (x86-64):\n");
+#endif
+	dfprintf(stderr, "  CMOV ....... %c\n", info->x86.cmov    ? 'Y' : 'n');
+	dfprintf(stderr, "  MMX ........ %c\n", info->x86.mmx     ? 'Y' : 'n');
+	dfprintf(stderr, "  SSE ........ %c\n", info->x86.sse     ? 'Y' : 'n');
+	dfprintf(stderr, "  SSE2 ....... %c\n", info->x86.sse2    ? 'Y' : 'n');
+	dfprintf(stderr, "  SSE3 ....... %c\n", info->x86.sse3    ? 'Y' : 'n');
+	dfprintf(stderr, "  SSSE3 ...... %c\n", info->x86.ssse3   ? 'Y' : 'n');
+	dfprintf(stderr, "  SSE41 ...... %c\n", info->x86.sse41   ? 'Y' : 'n');
+	dfprintf(stderr, "  SSE42 ...... %c\n", info->x86.sse42   ? 'Y' : 'n');
+
+	if (FLAC__AVX_SUPPORTED) {
+		dfprintf(stderr, "  AVX ........ %c\n", info->x86.avx     ? 'Y' : 'n');
+		dfprintf(stderr, "  FMA ........ %c\n", info->x86.fma     ? 'Y' : 'n');
+		dfprintf(stderr, "  AVX2 ....... %c\n", info->x86.avx2    ? 'Y' : 'n');
+	}
+
+	/*
+	 * now have to check for OS support of AVX instructions
+	 */
+	if (FLAC__AVX_SUPPORTED && info->x86.avx && x86_osxsave && (cpu_xgetbv_x86() & 0x6) == 0x6) {
+		os_avx = true;
+	}
+	if (os_avx) {
+		dfprintf(stderr, "  AVX OS sup . %c\n", info->x86.avx ? 'Y' : 'n');
+	}
+	if (!os_avx) {
+		/* no OS AVX support */
+		info->x86.avx     = false;
+		info->x86.avx2    = false;
+		info->x86.fma     = false;
+	}
+#else
+	info->use_asm = false;
 #endif
 }
 
-#endif /* (FLAC__CPU_IA32 || FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN */
+static void
+ppc_cpu_info (FLAC__CPUInfo *info)
+{
+#if defined FLAC__CPU_PPC
+#ifndef PPC_FEATURE2_ARCH_3_00
+#define PPC_FEATURE2_ARCH_3_00		0x00800000
+#endif
+
+#ifndef PPC_FEATURE2_ARCH_2_07
+#define PPC_FEATURE2_ARCH_2_07		0x80000000
+#endif
+
+#if defined (__linux__) && defined(HAVE_GETAUXVAL)
+	if (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_3_00) {
+		info->ppc.arch_3_00 = true;
+	} else if (getauxval(AT_HWCAP2) & PPC_FEATURE2_ARCH_2_07) {
+		info->ppc.arch_2_07 = true;
+	}
+#elif defined(__FreeBSD__) && defined(HAVE_SYS_AUXV_H)
+	unsigned long hwcaps;
+	elf_aux_info(AT_HWCAP2, &hwcaps, sizeof(hwcaps));
+	if (hwcaps & PPC_FEATURE2_ARCH_3_00) {
+		info->ppc.arch_3_00 = true;
+	} else if (hwcaps & PPC_FEATURE2_ARCH_2_07) {
+		info->ppc.arch_2_07 = true;
+	}
+#elif defined(__APPLE__)
+	/* no Mac OS X version supports CPU with Power AVI v2.07 or better */
+	info->ppc.arch_2_07 = false;
+	info->ppc.arch_3_00 = false;
+#else
+	info->ppc.arch_2_07 = false;
+	info->ppc.arch_3_00 = false;
+#endif
+
+#else
+	info->ppc.arch_2_07 = false;
+	info->ppc.arch_3_00 = false;
+#endif
+}
+
+void FLAC__cpu_info (FLAC__CPUInfo *info)
+{
+	memset(info, 0, sizeof(*info));
+
+#ifdef FLAC__CPU_IA32
+	info->type = FLAC__CPUINFO_TYPE_IA32;
+#elif defined FLAC__CPU_X86_64
+	info->type = FLAC__CPUINFO_TYPE_X86_64;
+#elif defined FLAC__CPU_PPC
+	info->type = FLAC__CPUINFO_TYPE_PPC;
+#else
+	info->type = FLAC__CPUINFO_TYPE_UNKNOWN;
+#endif
+
+	switch (info->type) {
+	case FLAC__CPUINFO_TYPE_IA32: /* fallthrough */
+	case FLAC__CPUINFO_TYPE_X86_64:
+		x86_cpu_info (info);
+		break;
+	case FLAC__CPUINFO_TYPE_PPC:
+		ppc_cpu_info (info);
+		break;
+	default:
+		info->use_asm = false;
+		break;
+	}
+}

diff --git a/src/libFLAC/crc.c b/src/libFLAC/crc.c
index de2cb18..4f47e98 100644
--- a/src/libFLAC/crc.c
+++ b/src/libFLAC/crc.c

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2000-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -38,7 +38,7 @@
 
 /* CRC-8, poly = x^8 + x^2 + x^1 + x^0, init = 0 */
 
-FLAC__byte const FLAC__crc8_table[256] = {
+FLAC__uint8 const FLAC__crc8_table[256] = {
 	0x00, 0x07, 0x0E, 0x09, 0x1C, 0x1B, 0x12, 0x15,
 	0x38, 0x3F, 0x36, 0x31, 0x24, 0x23, 0x2A, 0x2D,
 	0x70, 0x77, 0x7E, 0x79, 0x6C, 0x6B, 0x62, 0x65,
@@ -75,8 +75,8 @@
 
 /* CRC-16, poly = x^16 + x^15 + x^2 + x^0, init = 0 */
 
-unsigned const FLAC__crc16_table[256] = {
-	0x0000,  0x8005,  0x800f,  0x000a,  0x801b,  0x001e,  0x0014,  0x8011,
+FLAC__uint16 const FLAC__crc16_table[8][256] = {
+  { 0x0000,  0x8005,  0x800f,  0x000a,  0x801b,  0x001e,  0x0014,  0x8011,
 	0x8033,  0x0036,  0x003c,  0x8039,  0x0028,  0x802d,  0x8027,  0x0022,
 	0x8063,  0x0066,  0x006c,  0x8069,  0x0078,  0x807d,  0x8077,  0x0072,
 	0x0050,  0x8055,  0x805f,  0x005a,  0x804b,  0x004e,  0x0044,  0x8041,
@@ -107,22 +107,263 @@
 	0x8243,  0x0246,  0x024c,  0x8249,  0x0258,  0x825d,  0x8257,  0x0252,
 	0x0270,  0x8275,  0x827f,  0x027a,  0x826b,  0x026e,  0x0264,  0x8261,
 	0x0220,  0x8225,  0x822f,  0x022a,  0x823b,  0x023e,  0x0234,  0x8231,
-	0x8213,  0x0216,  0x021c,  0x8219,  0x0208,  0x820d,  0x8207,  0x0202
+	0x8213,  0x0216,  0x021c,  0x8219,  0x0208,  0x820d,  0x8207,  0x0202 },
+
+  { 0x0000,  0x8603,  0x8c03,  0x0a00,  0x9803,  0x1e00,  0x1400,  0x9203,
+	0xb003,  0x3600,  0x3c00,  0xba03,  0x2800,  0xae03,  0xa403,  0x2200,
+	0xe003,  0x6600,  0x6c00,  0xea03,  0x7800,  0xfe03,  0xf403,  0x7200,
+	0x5000,  0xd603,  0xdc03,  0x5a00,  0xc803,  0x4e00,  0x4400,  0xc203,
+	0x4003,  0xc600,  0xcc00,  0x4a03,  0xd800,  0x5e03,  0x5403,  0xd200,
+	0xf000,  0x7603,  0x7c03,  0xfa00,  0x6803,  0xee00,  0xe400,  0x6203,
+	0xa000,  0x2603,  0x2c03,  0xaa00,  0x3803,  0xbe00,  0xb400,  0x3203,
+	0x1003,  0x9600,  0x9c00,  0x1a03,  0x8800,  0x0e03,  0x0403,  0x8200,
+	0x8006,  0x0605,  0x0c05,  0x8a06,  0x1805,  0x9e06,  0x9406,  0x1205,
+	0x3005,  0xb606,  0xbc06,  0x3a05,  0xa806,  0x2e05,  0x2405,  0xa206,
+	0x6005,  0xe606,  0xec06,  0x6a05,  0xf806,  0x7e05,  0x7405,  0xf206,
+	0xd006,  0x5605,  0x5c05,  0xda06,  0x4805,  0xce06,  0xc406,  0x4205,
+	0xc005,  0x4606,  0x4c06,  0xca05,  0x5806,  0xde05,  0xd405,  0x5206,
+	0x7006,  0xf605,  0xfc05,  0x7a06,  0xe805,  0x6e06,  0x6406,  0xe205,
+	0x2006,  0xa605,  0xac05,  0x2a06,  0xb805,  0x3e06,  0x3406,  0xb205,
+	0x9005,  0x1606,  0x1c06,  0x9a05,  0x0806,  0x8e05,  0x8405,  0x0206,
+	0x8009,  0x060a,  0x0c0a,  0x8a09,  0x180a,  0x9e09,  0x9409,  0x120a,
+	0x300a,  0xb609,  0xbc09,  0x3a0a,  0xa809,  0x2e0a,  0x240a,  0xa209,
+	0x600a,  0xe609,  0xec09,  0x6a0a,  0xf809,  0x7e0a,  0x740a,  0xf209,
+	0xd009,  0x560a,  0x5c0a,  0xda09,  0x480a,  0xce09,  0xc409,  0x420a,
+	0xc00a,  0x4609,  0x4c09,  0xca0a,  0x5809,  0xde0a,  0xd40a,  0x5209,
+	0x7009,  0xf60a,  0xfc0a,  0x7a09,  0xe80a,  0x6e09,  0x6409,  0xe20a,
+	0x2009,  0xa60a,  0xac0a,  0x2a09,  0xb80a,  0x3e09,  0x3409,  0xb20a,
+	0x900a,  0x1609,  0x1c09,  0x9a0a,  0x0809,  0x8e0a,  0x840a,  0x0209,
+	0x000f,  0x860c,  0x8c0c,  0x0a0f,  0x980c,  0x1e0f,  0x140f,  0x920c,
+	0xb00c,  0x360f,  0x3c0f,  0xba0c,  0x280f,  0xae0c,  0xa40c,  0x220f,
+	0xe00c,  0x660f,  0x6c0f,  0xea0c,  0x780f,  0xfe0c,  0xf40c,  0x720f,
+	0x500f,  0xd60c,  0xdc0c,  0x5a0f,  0xc80c,  0x4e0f,  0x440f,  0xc20c,
+	0x400c,  0xc60f,  0xcc0f,  0x4a0c,  0xd80f,  0x5e0c,  0x540c,  0xd20f,
+	0xf00f,  0x760c,  0x7c0c,  0xfa0f,  0x680c,  0xee0f,  0xe40f,  0x620c,
+	0xa00f,  0x260c,  0x2c0c,  0xaa0f,  0x380c,  0xbe0f,  0xb40f,  0x320c,
+	0x100c,  0x960f,  0x9c0f,  0x1a0c,  0x880f,  0x0e0c,  0x040c,  0x820f },
+
+  { 0x0000,  0x8017,  0x802b,  0x003c,  0x8053,  0x0044,  0x0078,  0x806f,
+	0x80a3,  0x00b4,  0x0088,  0x809f,  0x00f0,  0x80e7,  0x80db,  0x00cc,
+	0x8143,  0x0154,  0x0168,  0x817f,  0x0110,  0x8107,  0x813b,  0x012c,
+	0x01e0,  0x81f7,  0x81cb,  0x01dc,  0x81b3,  0x01a4,  0x0198,  0x818f,
+	0x8283,  0x0294,  0x02a8,  0x82bf,  0x02d0,  0x82c7,  0x82fb,  0x02ec,
+	0x0220,  0x8237,  0x820b,  0x021c,  0x8273,  0x0264,  0x0258,  0x824f,
+	0x03c0,  0x83d7,  0x83eb,  0x03fc,  0x8393,  0x0384,  0x03b8,  0x83af,
+	0x8363,  0x0374,  0x0348,  0x835f,  0x0330,  0x8327,  0x831b,  0x030c,
+	0x8503,  0x0514,  0x0528,  0x853f,  0x0550,  0x8547,  0x857b,  0x056c,
+	0x05a0,  0x85b7,  0x858b,  0x059c,  0x85f3,  0x05e4,  0x05d8,  0x85cf,
+	0x0440,  0x8457,  0x846b,  0x047c,  0x8413,  0x0404,  0x0438,  0x842f,
+	0x84e3,  0x04f4,  0x04c8,  0x84df,  0x04b0,  0x84a7,  0x849b,  0x048c,
+	0x0780,  0x8797,  0x87ab,  0x07bc,  0x87d3,  0x07c4,  0x07f8,  0x87ef,
+	0x8723,  0x0734,  0x0708,  0x871f,  0x0770,  0x8767,  0x875b,  0x074c,
+	0x86c3,  0x06d4,  0x06e8,  0x86ff,  0x0690,  0x8687,  0x86bb,  0x06ac,
+	0x0660,  0x8677,  0x864b,  0x065c,  0x8633,  0x0624,  0x0618,  0x860f,
+	0x8a03,  0x0a14,  0x0a28,  0x8a3f,  0x0a50,  0x8a47,  0x8a7b,  0x0a6c,
+	0x0aa0,  0x8ab7,  0x8a8b,  0x0a9c,  0x8af3,  0x0ae4,  0x0ad8,  0x8acf,
+	0x0b40,  0x8b57,  0x8b6b,  0x0b7c,  0x8b13,  0x0b04,  0x0b38,  0x8b2f,
+	0x8be3,  0x0bf4,  0x0bc8,  0x8bdf,  0x0bb0,  0x8ba7,  0x8b9b,  0x0b8c,
+	0x0880,  0x8897,  0x88ab,  0x08bc,  0x88d3,  0x08c4,  0x08f8,  0x88ef,
+	0x8823,  0x0834,  0x0808,  0x881f,  0x0870,  0x8867,  0x885b,  0x084c,
+	0x89c3,  0x09d4,  0x09e8,  0x89ff,  0x0990,  0x8987,  0x89bb,  0x09ac,
+	0x0960,  0x8977,  0x894b,  0x095c,  0x8933,  0x0924,  0x0918,  0x890f,
+	0x0f00,  0x8f17,  0x8f2b,  0x0f3c,  0x8f53,  0x0f44,  0x0f78,  0x8f6f,
+	0x8fa3,  0x0fb4,  0x0f88,  0x8f9f,  0x0ff0,  0x8fe7,  0x8fdb,  0x0fcc,
+	0x8e43,  0x0e54,  0x0e68,  0x8e7f,  0x0e10,  0x8e07,  0x8e3b,  0x0e2c,
+	0x0ee0,  0x8ef7,  0x8ecb,  0x0edc,  0x8eb3,  0x0ea4,  0x0e98,  0x8e8f,
+	0x8d83,  0x0d94,  0x0da8,  0x8dbf,  0x0dd0,  0x8dc7,  0x8dfb,  0x0dec,
+	0x0d20,  0x8d37,  0x8d0b,  0x0d1c,  0x8d73,  0x0d64,  0x0d58,  0x8d4f,
+	0x0cc0,  0x8cd7,  0x8ceb,  0x0cfc,  0x8c93,  0x0c84,  0x0cb8,  0x8caf,
+	0x8c63,  0x0c74,  0x0c48,  0x8c5f,  0x0c30,  0x8c27,  0x8c1b,  0x0c0c },
+
+  { 0x0000,  0x9403,  0xa803,  0x3c00,  0xd003,  0x4400,  0x7800,  0xec03,
+	0x2003,  0xb400,  0x8800,  0x1c03,  0xf000,  0x6403,  0x5803,  0xcc00,
+	0x4006,  0xd405,  0xe805,  0x7c06,  0x9005,  0x0406,  0x3806,  0xac05,
+	0x6005,  0xf406,  0xc806,  0x5c05,  0xb006,  0x2405,  0x1805,  0x8c06,
+	0x800c,  0x140f,  0x280f,  0xbc0c,  0x500f,  0xc40c,  0xf80c,  0x6c0f,
+	0xa00f,  0x340c,  0x080c,  0x9c0f,  0x700c,  0xe40f,  0xd80f,  0x4c0c,
+	0xc00a,  0x5409,  0x6809,  0xfc0a,  0x1009,  0x840a,  0xb80a,  0x2c09,
+	0xe009,  0x740a,  0x480a,  0xdc09,  0x300a,  0xa409,  0x9809,  0x0c0a,
+	0x801d,  0x141e,  0x281e,  0xbc1d,  0x501e,  0xc41d,  0xf81d,  0x6c1e,
+	0xa01e,  0x341d,  0x081d,  0x9c1e,  0x701d,  0xe41e,  0xd81e,  0x4c1d,
+	0xc01b,  0x5418,  0x6818,  0xfc1b,  0x1018,  0x841b,  0xb81b,  0x2c18,
+	0xe018,  0x741b,  0x481b,  0xdc18,  0x301b,  0xa418,  0x9818,  0x0c1b,
+	0x0011,  0x9412,  0xa812,  0x3c11,  0xd012,  0x4411,  0x7811,  0xec12,
+	0x2012,  0xb411,  0x8811,  0x1c12,  0xf011,  0x6412,  0x5812,  0xcc11,
+	0x4017,  0xd414,  0xe814,  0x7c17,  0x9014,  0x0417,  0x3817,  0xac14,
+	0x6014,  0xf417,  0xc817,  0x5c14,  0xb017,  0x2414,  0x1814,  0x8c17,
+	0x803f,  0x143c,  0x283c,  0xbc3f,  0x503c,  0xc43f,  0xf83f,  0x6c3c,
+	0xa03c,  0x343f,  0x083f,  0x9c3c,  0x703f,  0xe43c,  0xd83c,  0x4c3f,
+	0xc039,  0x543a,  0x683a,  0xfc39,  0x103a,  0x8439,  0xb839,  0x2c3a,
+	0xe03a,  0x7439,  0x4839,  0xdc3a,  0x3039,  0xa43a,  0x983a,  0x0c39,
+	0x0033,  0x9430,  0xa830,  0x3c33,  0xd030,  0x4433,  0x7833,  0xec30,
+	0x2030,  0xb433,  0x8833,  0x1c30,  0xf033,  0x6430,  0x5830,  0xcc33,
+	0x4035,  0xd436,  0xe836,  0x7c35,  0x9036,  0x0435,  0x3835,  0xac36,
+	0x6036,  0xf435,  0xc835,  0x5c36,  0xb035,  0x2436,  0x1836,  0x8c35,
+	0x0022,  0x9421,  0xa821,  0x3c22,  0xd021,  0x4422,  0x7822,  0xec21,
+	0x2021,  0xb422,  0x8822,  0x1c21,  0xf022,  0x6421,  0x5821,  0xcc22,
+	0x4024,  0xd427,  0xe827,  0x7c24,  0x9027,  0x0424,  0x3824,  0xac27,
+	0x6027,  0xf424,  0xc824,  0x5c27,  0xb024,  0x2427,  0x1827,  0x8c24,
+	0x802e,  0x142d,  0x282d,  0xbc2e,  0x502d,  0xc42e,  0xf82e,  0x6c2d,
+	0xa02d,  0x342e,  0x082e,  0x9c2d,  0x702e,  0xe42d,  0xd82d,  0x4c2e,
+	0xc028,  0x542b,  0x682b,  0xfc28,  0x102b,  0x8428,  0xb828,  0x2c2b,
+	0xe02b,  0x7428,  0x4828,  0xdc2b,  0x3028,  0xa42b,  0x982b,  0x0c28 },
+
+  { 0x0000,  0x807b,  0x80f3,  0x0088,  0x81e3,  0x0198,  0x0110,  0x816b,
+	0x83c3,  0x03b8,  0x0330,  0x834b,  0x0220,  0x825b,  0x82d3,  0x02a8,
+	0x8783,  0x07f8,  0x0770,  0x870b,  0x0660,  0x861b,  0x8693,  0x06e8,
+	0x0440,  0x843b,  0x84b3,  0x04c8,  0x85a3,  0x05d8,  0x0550,  0x852b,
+	0x8f03,  0x0f78,  0x0ff0,  0x8f8b,  0x0ee0,  0x8e9b,  0x8e13,  0x0e68,
+	0x0cc0,  0x8cbb,  0x8c33,  0x0c48,  0x8d23,  0x0d58,  0x0dd0,  0x8dab,
+	0x0880,  0x88fb,  0x8873,  0x0808,  0x8963,  0x0918,  0x0990,  0x89eb,
+	0x8b43,  0x0b38,  0x0bb0,  0x8bcb,  0x0aa0,  0x8adb,  0x8a53,  0x0a28,
+	0x9e03,  0x1e78,  0x1ef0,  0x9e8b,  0x1fe0,  0x9f9b,  0x9f13,  0x1f68,
+	0x1dc0,  0x9dbb,  0x9d33,  0x1d48,  0x9c23,  0x1c58,  0x1cd0,  0x9cab,
+	0x1980,  0x99fb,  0x9973,  0x1908,  0x9863,  0x1818,  0x1890,  0x98eb,
+	0x9a43,  0x1a38,  0x1ab0,  0x9acb,  0x1ba0,  0x9bdb,  0x9b53,  0x1b28,
+	0x1100,  0x917b,  0x91f3,  0x1188,  0x90e3,  0x1098,  0x1010,  0x906b,
+	0x92c3,  0x12b8,  0x1230,  0x924b,  0x1320,  0x935b,  0x93d3,  0x13a8,
+	0x9683,  0x16f8,  0x1670,  0x960b,  0x1760,  0x971b,  0x9793,  0x17e8,
+	0x1540,  0x953b,  0x95b3,  0x15c8,  0x94a3,  0x14d8,  0x1450,  0x942b,
+	0xbc03,  0x3c78,  0x3cf0,  0xbc8b,  0x3de0,  0xbd9b,  0xbd13,  0x3d68,
+	0x3fc0,  0xbfbb,  0xbf33,  0x3f48,  0xbe23,  0x3e58,  0x3ed0,  0xbeab,
+	0x3b80,  0xbbfb,  0xbb73,  0x3b08,  0xba63,  0x3a18,  0x3a90,  0xbaeb,
+	0xb843,  0x3838,  0x38b0,  0xb8cb,  0x39a0,  0xb9db,  0xb953,  0x3928,
+	0x3300,  0xb37b,  0xb3f3,  0x3388,  0xb2e3,  0x3298,  0x3210,  0xb26b,
+	0xb0c3,  0x30b8,  0x3030,  0xb04b,  0x3120,  0xb15b,  0xb1d3,  0x31a8,
+	0xb483,  0x34f8,  0x3470,  0xb40b,  0x3560,  0xb51b,  0xb593,  0x35e8,
+	0x3740,  0xb73b,  0xb7b3,  0x37c8,  0xb6a3,  0x36d8,  0x3650,  0xb62b,
+	0x2200,  0xa27b,  0xa2f3,  0x2288,  0xa3e3,  0x2398,  0x2310,  0xa36b,
+	0xa1c3,  0x21b8,  0x2130,  0xa14b,  0x2020,  0xa05b,  0xa0d3,  0x20a8,
+	0xa583,  0x25f8,  0x2570,  0xa50b,  0x2460,  0xa41b,  0xa493,  0x24e8,
+	0x2640,  0xa63b,  0xa6b3,  0x26c8,  0xa7a3,  0x27d8,  0x2750,  0xa72b,
+	0xad03,  0x2d78,  0x2df0,  0xad8b,  0x2ce0,  0xac9b,  0xac13,  0x2c68,
+	0x2ec0,  0xaebb,  0xae33,  0x2e48,  0xaf23,  0x2f58,  0x2fd0,  0xafab,
+	0x2a80,  0xaafb,  0xaa73,  0x2a08,  0xab63,  0x2b18,  0x2b90,  0xabeb,
+	0xa943,  0x2938,  0x29b0,  0xa9cb,  0x28a0,  0xa8db,  0xa853,  0x2828 },
+
+  { 0x0000,  0xf803,  0x7003,  0x8800,  0xe006,  0x1805,  0x9005,  0x6806,
+	0x4009,  0xb80a,  0x300a,  0xc809,  0xa00f,  0x580c,  0xd00c,  0x280f,
+	0x8012,  0x7811,  0xf011,  0x0812,  0x6014,  0x9817,  0x1017,  0xe814,
+	0xc01b,  0x3818,  0xb018,  0x481b,  0x201d,  0xd81e,  0x501e,  0xa81d,
+	0x8021,  0x7822,  0xf022,  0x0821,  0x6027,  0x9824,  0x1024,  0xe827,
+	0xc028,  0x382b,  0xb02b,  0x4828,  0x202e,  0xd82d,  0x502d,  0xa82e,
+	0x0033,  0xf830,  0x7030,  0x8833,  0xe035,  0x1836,  0x9036,  0x6835,
+	0x403a,  0xb839,  0x3039,  0xc83a,  0xa03c,  0x583f,  0xd03f,  0x283c,
+	0x8047,  0x7844,  0xf044,  0x0847,  0x6041,  0x9842,  0x1042,  0xe841,
+	0xc04e,  0x384d,  0xb04d,  0x484e,  0x2048,  0xd84b,  0x504b,  0xa848,
+	0x0055,  0xf856,  0x7056,  0x8855,  0xe053,  0x1850,  0x9050,  0x6853,
+	0x405c,  0xb85f,  0x305f,  0xc85c,  0xa05a,  0x5859,  0xd059,  0x285a,
+	0x0066,  0xf865,  0x7065,  0x8866,  0xe060,  0x1863,  0x9063,  0x6860,
+	0x406f,  0xb86c,  0x306c,  0xc86f,  0xa069,  0x586a,  0xd06a,  0x2869,
+	0x8074,  0x7877,  0xf077,  0x0874,  0x6072,  0x9871,  0x1071,  0xe872,
+	0xc07d,  0x387e,  0xb07e,  0x487d,  0x207b,  0xd878,  0x5078,  0xa87b,
+	0x808b,  0x7888,  0xf088,  0x088b,  0x608d,  0x988e,  0x108e,  0xe88d,
+	0xc082,  0x3881,  0xb081,  0x4882,  0x2084,  0xd887,  0x5087,  0xa884,
+	0x0099,  0xf89a,  0x709a,  0x8899,  0xe09f,  0x189c,  0x909c,  0x689f,
+	0x4090,  0xb893,  0x3093,  0xc890,  0xa096,  0x5895,  0xd095,  0x2896,
+	0x00aa,  0xf8a9,  0x70a9,  0x88aa,  0xe0ac,  0x18af,  0x90af,  0x68ac,
+	0x40a3,  0xb8a0,  0x30a0,  0xc8a3,  0xa0a5,  0x58a6,  0xd0a6,  0x28a5,
+	0x80b8,  0x78bb,  0xf0bb,  0x08b8,  0x60be,  0x98bd,  0x10bd,  0xe8be,
+	0xc0b1,  0x38b2,  0xb0b2,  0x48b1,  0x20b7,  0xd8b4,  0x50b4,  0xa8b7,
+	0x00cc,  0xf8cf,  0x70cf,  0x88cc,  0xe0ca,  0x18c9,  0x90c9,  0x68ca,
+	0x40c5,  0xb8c6,  0x30c6,  0xc8c5,  0xa0c3,  0x58c0,  0xd0c0,  0x28c3,
+	0x80de,  0x78dd,  0xf0dd,  0x08de,  0x60d8,  0x98db,  0x10db,  0xe8d8,
+	0xc0d7,  0x38d4,  0xb0d4,  0x48d7,  0x20d1,  0xd8d2,  0x50d2,  0xa8d1,
+	0x80ed,  0x78ee,  0xf0ee,  0x08ed,  0x60eb,  0x98e8,  0x10e8,  0xe8eb,
+	0xc0e4,  0x38e7,  0xb0e7,  0x48e4,  0x20e2,  0xd8e1,  0x50e1,  0xa8e2,
+	0x00ff,  0xf8fc,  0x70fc,  0x88ff,  0xe0f9,  0x18fa,  0x90fa,  0x68f9,
+	0x40f6,  0xb8f5,  0x30f5,  0xc8f6,  0xa0f0,  0x58f3,  0xd0f3,  0x28f0 },
+
+  { 0x0000,  0x8113,  0x8223,  0x0330,  0x8443,  0x0550,  0x0660,  0x8773,
+	0x8883,  0x0990,  0x0aa0,  0x8bb3,  0x0cc0,  0x8dd3,  0x8ee3,  0x0ff0,
+	0x9103,  0x1010,  0x1320,  0x9233,  0x1540,  0x9453,  0x9763,  0x1670,
+	0x1980,  0x9893,  0x9ba3,  0x1ab0,  0x9dc3,  0x1cd0,  0x1fe0,  0x9ef3,
+	0xa203,  0x2310,  0x2020,  0xa133,  0x2640,  0xa753,  0xa463,  0x2570,
+	0x2a80,  0xab93,  0xa8a3,  0x29b0,  0xaec3,  0x2fd0,  0x2ce0,  0xadf3,
+	0x3300,  0xb213,  0xb123,  0x3030,  0xb743,  0x3650,  0x3560,  0xb473,
+	0xbb83,  0x3a90,  0x39a0,  0xb8b3,  0x3fc0,  0xbed3,  0xbde3,  0x3cf0,
+	0xc403,  0x4510,  0x4620,  0xc733,  0x4040,  0xc153,  0xc263,  0x4370,
+	0x4c80,  0xcd93,  0xcea3,  0x4fb0,  0xc8c3,  0x49d0,  0x4ae0,  0xcbf3,
+	0x5500,  0xd413,  0xd723,  0x5630,  0xd143,  0x5050,  0x5360,  0xd273,
+	0xdd83,  0x5c90,  0x5fa0,  0xdeb3,  0x59c0,  0xd8d3,  0xdbe3,  0x5af0,
+	0x6600,  0xe713,  0xe423,  0x6530,  0xe243,  0x6350,  0x6060,  0xe173,
+	0xee83,  0x6f90,  0x6ca0,  0xedb3,  0x6ac0,  0xebd3,  0xe8e3,  0x69f0,
+	0xf703,  0x7610,  0x7520,  0xf433,  0x7340,  0xf253,  0xf163,  0x7070,
+	0x7f80,  0xfe93,  0xfda3,  0x7cb0,  0xfbc3,  0x7ad0,  0x79e0,  0xf8f3,
+	0x0803,  0x8910,  0x8a20,  0x0b33,  0x8c40,  0x0d53,  0x0e63,  0x8f70,
+	0x8080,  0x0193,  0x02a3,  0x83b0,  0x04c3,  0x85d0,  0x86e0,  0x07f3,
+	0x9900,  0x1813,  0x1b23,  0x9a30,  0x1d43,  0x9c50,  0x9f60,  0x1e73,
+	0x1183,  0x9090,  0x93a0,  0x12b3,  0x95c0,  0x14d3,  0x17e3,  0x96f0,
+	0xaa00,  0x2b13,  0x2823,  0xa930,  0x2e43,  0xaf50,  0xac60,  0x2d73,
+	0x2283,  0xa390,  0xa0a0,  0x21b3,  0xa6c0,  0x27d3,  0x24e3,  0xa5f0,
+	0x3b03,  0xba10,  0xb920,  0x3833,  0xbf40,  0x3e53,  0x3d63,  0xbc70,
+	0xb380,  0x3293,  0x31a3,  0xb0b0,  0x37c3,  0xb6d0,  0xb5e0,  0x34f3,
+	0xcc00,  0x4d13,  0x4e23,  0xcf30,  0x4843,  0xc950,  0xca60,  0x4b73,
+	0x4483,  0xc590,  0xc6a0,  0x47b3,  0xc0c0,  0x41d3,  0x42e3,  0xc3f0,
+	0x5d03,  0xdc10,  0xdf20,  0x5e33,  0xd940,  0x5853,  0x5b63,  0xda70,
+	0xd580,  0x5493,  0x57a3,  0xd6b0,  0x51c3,  0xd0d0,  0xd3e0,  0x52f3,
+	0x6e03,  0xef10,  0xec20,  0x6d33,  0xea40,  0x6b53,  0x6863,  0xe970,
+	0xe680,  0x6793,  0x64a3,  0xe5b0,  0x62c3,  0xe3d0,  0xe0e0,  0x61f3,
+	0xff00,  0x7e13,  0x7d23,  0xfc30,  0x7b43,  0xfa50,  0xf960,  0x7873,
+	0x7783,  0xf690,  0xf5a0,  0x74b3,  0xf3c0,  0x72d3,  0x71e3,  0xf0f0 },
+
+  { 0x0000,  0x1006,  0x200c,  0x300a,  0x4018,  0x501e,  0x6014,  0x7012,
+	0x8030,  0x9036,  0xa03c,  0xb03a,  0xc028,  0xd02e,  0xe024,  0xf022,
+	0x8065,  0x9063,  0xa069,  0xb06f,  0xc07d,  0xd07b,  0xe071,  0xf077,
+	0x0055,  0x1053,  0x2059,  0x305f,  0x404d,  0x504b,  0x6041,  0x7047,
+	0x80cf,  0x90c9,  0xa0c3,  0xb0c5,  0xc0d7,  0xd0d1,  0xe0db,  0xf0dd,
+	0x00ff,  0x10f9,  0x20f3,  0x30f5,  0x40e7,  0x50e1,  0x60eb,  0x70ed,
+	0x00aa,  0x10ac,  0x20a6,  0x30a0,  0x40b2,  0x50b4,  0x60be,  0x70b8,
+	0x809a,  0x909c,  0xa096,  0xb090,  0xc082,  0xd084,  0xe08e,  0xf088,
+	0x819b,  0x919d,  0xa197,  0xb191,  0xc183,  0xd185,  0xe18f,  0xf189,
+	0x01ab,  0x11ad,  0x21a7,  0x31a1,  0x41b3,  0x51b5,  0x61bf,  0x71b9,
+	0x01fe,  0x11f8,  0x21f2,  0x31f4,  0x41e6,  0x51e0,  0x61ea,  0x71ec,
+	0x81ce,  0x91c8,  0xa1c2,  0xb1c4,  0xc1d6,  0xd1d0,  0xe1da,  0xf1dc,
+	0x0154,  0x1152,  0x2158,  0x315e,  0x414c,  0x514a,  0x6140,  0x7146,
+	0x8164,  0x9162,  0xa168,  0xb16e,  0xc17c,  0xd17a,  0xe170,  0xf176,
+	0x8131,  0x9137,  0xa13d,  0xb13b,  0xc129,  0xd12f,  0xe125,  0xf123,
+	0x0101,  0x1107,  0x210d,  0x310b,  0x4119,  0x511f,  0x6115,  0x7113,
+	0x8333,  0x9335,  0xa33f,  0xb339,  0xc32b,  0xd32d,  0xe327,  0xf321,
+	0x0303,  0x1305,  0x230f,  0x3309,  0x431b,  0x531d,  0x6317,  0x7311,
+	0x0356,  0x1350,  0x235a,  0x335c,  0x434e,  0x5348,  0x6342,  0x7344,
+	0x8366,  0x9360,  0xa36a,  0xb36c,  0xc37e,  0xd378,  0xe372,  0xf374,
+	0x03fc,  0x13fa,  0x23f0,  0x33f6,  0x43e4,  0x53e2,  0x63e8,  0x73ee,
+	0x83cc,  0x93ca,  0xa3c0,  0xb3c6,  0xc3d4,  0xd3d2,  0xe3d8,  0xf3de,
+	0x8399,  0x939f,  0xa395,  0xb393,  0xc381,  0xd387,  0xe38d,  0xf38b,
+	0x03a9,  0x13af,  0x23a5,  0x33a3,  0x43b1,  0x53b7,  0x63bd,  0x73bb,
+	0x02a8,  0x12ae,  0x22a4,  0x32a2,  0x42b0,  0x52b6,  0x62bc,  0x72ba,
+	0x8298,  0x929e,  0xa294,  0xb292,  0xc280,  0xd286,  0xe28c,  0xf28a,
+	0x82cd,  0x92cb,  0xa2c1,  0xb2c7,  0xc2d5,  0xd2d3,  0xe2d9,  0xf2df,
+	0x02fd,  0x12fb,  0x22f1,  0x32f7,  0x42e5,  0x52e3,  0x62e9,  0x72ef,
+	0x8267,  0x9261,  0xa26b,  0xb26d,  0xc27f,  0xd279,  0xe273,  0xf275,
+	0x0257,  0x1251,  0x225b,  0x325d,  0x424f,  0x5249,  0x6243,  0x7245,
+	0x0202,  0x1204,  0x220e,  0x3208,  0x421a,  0x521c,  0x6216,  0x7210,
+	0x8232,  0x9234,  0xa23e,  0xb238,  0xc22a,  0xd22c,  0xe226,  0xf220 }
 };
 
-
-void FLAC__crc8_update(const FLAC__byte data, FLAC__uint8 *crc)
+#if 0
+void FLAC__crc16_init_table(void)
 {
-	*crc = FLAC__crc8_table[*crc ^ data];
-}
+	int i, j;
+	FLAC__uint16 polynomial, crc;
+	polynomial = 0x8005;
 
-void FLAC__crc8_update_block(const FLAC__byte *data, unsigned len, FLAC__uint8 *crc)
-{
-	while(len--)
-		*crc = FLAC__crc8_table[*crc ^ *data++];
-}
+	for(i = 0; i <= 0xFF; i++){
+		crc = i << 8;
 
-FLAC__uint8 FLAC__crc8(const FLAC__byte *data, unsigned len)
+		for(j = 0; j < 8; j++)
+			crc = (crc << 1) ^ (crc & (1 << 15) ? polynomial : 0);
+
+		FLAC__crc16_table[0][i] = crc;
+	}
+
+	for(i = 0; i <= 0xFF; i++)
+		for(j = 1; j < 8; j++)
+			FLAC__crc16_table[j][i] = FLAC__crc16_table[0][FLAC__crc16_table[j - 1][i] >> 8] ^ (FLAC__crc16_table[j - 1][i] << 8);
+}
+#endif
+
+FLAC__uint8 FLAC__crc8(const FLAC__byte *data, uint32_t len)
 {
 	FLAC__uint8 crc = 0;
 
@@ -132,12 +373,64 @@
 	return crc;
 }
 
-unsigned FLAC__crc16(const FLAC__byte *data, unsigned len)
+FLAC__uint16 FLAC__crc16(const FLAC__byte *data, uint32_t len)
 {
-	unsigned crc = 0;
+	FLAC__uint16 crc = 0;
+
+	while(len >= 8){
+		crc ^= data[0] << 8 | data[1];
+
+		crc = FLAC__crc16_table[7][crc >> 8] ^ FLAC__crc16_table[6][crc & 0xFF] ^
+		      FLAC__crc16_table[5][data[2] ] ^ FLAC__crc16_table[4][data[3]   ] ^
+		      FLAC__crc16_table[3][data[4] ] ^ FLAC__crc16_table[2][data[5]   ] ^
+		      FLAC__crc16_table[1][data[6] ] ^ FLAC__crc16_table[0][data[7]   ];
+
+		data += 8;
+		len -= 8;
+	}
 
 	while(len--)
-		crc = ((crc<<8) ^ FLAC__crc16_table[(crc>>8) ^ *data++]) & 0xffff;
+		crc = (crc<<8) ^ FLAC__crc16_table[0][(crc>>8) ^ *data++];
+
+	return crc;
+}
+
+FLAC__uint16 FLAC__crc16_update_words32(const FLAC__uint32 *words, uint32_t len, FLAC__uint16 crc)
+{
+	while (len >= 2) {
+		crc ^= words[0] >> 16;
+
+		crc = FLAC__crc16_table[7][crc >> 8               ] ^ FLAC__crc16_table[6][crc & 0xFF             ] ^
+		      FLAC__crc16_table[5][(words[0] >>  8) & 0xFF] ^ FLAC__crc16_table[4][ words[0]        & 0xFF] ^
+		      FLAC__crc16_table[3][ words[1] >> 24        ] ^ FLAC__crc16_table[2][(words[1] >> 16) & 0xFF] ^
+		      FLAC__crc16_table[1][(words[1] >>  8) & 0xFF] ^ FLAC__crc16_table[0][ words[1]        & 0xFF];
+
+		words += 2;
+		len -= 2;
+	}
+
+	if (len) {
+		crc ^= words[0] >> 16;
+
+		crc = FLAC__crc16_table[3][crc >> 8               ] ^ FLAC__crc16_table[2][crc & 0xFF             ] ^
+		      FLAC__crc16_table[1][(words[0] >>  8) & 0xFF] ^ FLAC__crc16_table[0][words[0]         & 0xFF];
+	}
+
+	return crc;
+}
+
+FLAC__uint16 FLAC__crc16_update_words64(const FLAC__uint64 *words, uint32_t len, FLAC__uint16 crc)
+{
+	while (len--) {
+		crc ^= words[0] >> 48;
+
+		crc = FLAC__crc16_table[7][crc >> 8               ] ^ FLAC__crc16_table[6][crc & 0xFF             ] ^
+		      FLAC__crc16_table[5][(words[0] >> 40) & 0xFF] ^ FLAC__crc16_table[4][(words[0] >> 32) & 0xFF] ^
+		      FLAC__crc16_table[3][(words[0] >> 24) & 0xFF] ^ FLAC__crc16_table[2][(words[0] >> 16) & 0xFF] ^
+		      FLAC__crc16_table[1][(words[0] >>  8) & 0xFF] ^ FLAC__crc16_table[0][ words[0]        & 0xFF];
+
+		words++;
+	}
 
 	return crc;
 }

diff --git a/src/libFLAC/deduplication/lpc_compute_autocorrelation_intrin.c b/src/libFLAC/deduplication/lpc_compute_autocorrelation_intrin.c
new file mode 100644
index 0000000..76419db
--- /dev/null
+++ b/src/libFLAC/deduplication/lpc_compute_autocorrelation_intrin.c

@@ -0,0 +1,14 @@
+	int i, j;
+	(void) lag;
+	FLAC__ASSERT(lag <= MAX_LAG);
+
+        for(i = 0; i < MAX_LAG; i++)
+                autoc[i] = 0.0;
+
+        for(i = 0; i < MAX_LAG; i++)
+                for(j = 0; j <= i; j++)
+                        autoc[j] += (double)data[i] * (double)data[i-j];
+
+        for(i = MAX_LAG; i < (int)data_len; i++)
+		for(j = 0; j < MAX_LAG; j++)
+	                autoc[j] += (double)data[i] * (double)data[i-j];

diff --git a/src/libFLAC/deduplication/lpc_compute_autocorrelation_intrin_neon.c b/src/libFLAC/deduplication/lpc_compute_autocorrelation_intrin_neon.c
new file mode 100644
index 0000000..4df3aee
--- /dev/null
+++ b/src/libFLAC/deduplication/lpc_compute_autocorrelation_intrin_neon.c

@@ -0,0 +1,70 @@
+	int i;
+	float64x2_t sum0 = vdupq_n_f64(0.0f);
+	float64x2_t sum1 = vdupq_n_f64(0.0f);
+	float64x2_t sum2 = vdupq_n_f64(0.0f);
+	float64x2_t sum3 = vdupq_n_f64(0.0f);
+	float64x2_t d0 = vdupq_n_f64(0.0f);
+	float64x2_t d1 = vdupq_n_f64(0.0f);
+	float64x2_t d2 = vdupq_n_f64(0.0f);
+	float64x2_t d3 = vdupq_n_f64(0.0f);
+#if MAX_LAG > 8
+	float64x2_t sum4 = vdupq_n_f64(0.0f);
+	float64x2_t d4 = vdupq_n_f64(0.0f);
+#endif
+#if MAX_LAG > 10
+	float64x2_t sum5 = vdupq_n_f64(0.0f);
+	float64x2_t sum6 = vdupq_n_f64(0.0f);
+	float64x2_t d5 = vdupq_n_f64(0.0f);
+	float64x2_t d6 = vdupq_n_f64(0.0f);
+#endif
+	float64x2_t d;
+
+	(void)lag;
+	FLAC__ASSERT(lag <= MAX_LAG);
+
+	// Loop backwards through samples from data_len to 0
+	for (i = data_len - 1; i >= 0; i--)
+	{
+		d = vdupq_n_f64(data[i]); // Create vector with 2 entries data[i]
+
+		// The next 6 lines of code right-shift the elements through the 7 vectors d0..d6.
+		// The 7th line adds the newly loaded element to d0. This works like a stack, where
+		// data[i] is pushed onto the stack every time and the 9th element falls off
+#if MAX_LAG > 10
+		d6 = vextq_f64(d5,d6,1);
+		d5 = vextq_f64(d4,d5,1);
+#endif
+#if MAX_LAG > 8
+		d4 = vextq_f64(d3,d4,1);
+#endif
+		d3 = vextq_f64(d2,d3,1);
+		d2 = vextq_f64(d1,d2,1);
+		d1 = vextq_f64(d0,d1,1);
+		d0 = vextq_f64(d,d0,1);
+
+		// Fused multiply-add sum += d * d0..d6
+		sum0 = vfmaq_f64(sum0, d, d0);
+		sum1 = vfmaq_f64(sum1, d, d1);
+		sum2 = vfmaq_f64(sum2, d, d2);
+		sum3 = vfmaq_f64(sum3, d, d3);
+#if MAX_LAG > 8
+		sum4 = vfmaq_f64(sum4, d, d4);
+#endif
+#if MAX_LAG > 10
+		sum5 = vfmaq_f64(sum5, d, d5);
+		sum6 = vfmaq_f64(sum6, d, d6);
+#endif
+	}
+
+    // Store sum0..sum6 in autoc[0..14]
+    vst1q_f64(autoc, sum0);
+    vst1q_f64(autoc + 2, sum1);
+    vst1q_f64(autoc + 4, sum2);
+    vst1q_f64(autoc + 6, sum3);
+#if MAX_LAG > 8
+    vst1q_f64(autoc + 8, sum4);
+#endif
+#if MAX_LAG > 10
+    vst1q_f64(autoc + 10, sum5);
+    vst1q_f64(autoc + 12, sum6);
+#endif

diff --git a/src/libFLAC/deduplication/lpc_compute_autocorrelation_intrin_sse2.c b/src/libFLAC/deduplication/lpc_compute_autocorrelation_intrin_sse2.c
new file mode 100644
index 0000000..607b42f
--- /dev/null
+++ b/src/libFLAC/deduplication/lpc_compute_autocorrelation_intrin_sse2.c

@@ -0,0 +1,81 @@
+/* This code is imported several times in lpc_intrin_sse2.c with different
+ * values for MAX_LAG. Comments are for MAX_LAG == 14 */
+	int i;
+	__m128d sum0, sum1, sum2, sum3;
+	__m128d d0, d1, d2, d3;
+#if MAX_LAG > 8
+	__m128d d4;
+	__m128d sum4;
+#endif
+#if MAX_LAG > 10
+	__m128d d5, d6;
+	__m128d sum5, sum6;
+#endif
+
+	(void) lag;
+	FLAC__ASSERT(lag <= MAX_LAG);
+
+	/* Initialize all sum vectors with zero */
+	sum0 = _mm_setzero_pd();
+	sum1 = _mm_setzero_pd();
+	sum2 = _mm_setzero_pd();
+	sum3 = _mm_setzero_pd();
+	d0 = _mm_setzero_pd();
+	d1 = _mm_setzero_pd();
+	d2 = _mm_setzero_pd();
+	d3 = _mm_setzero_pd();
+#if MAX_LAG > 8
+	sum4 = _mm_setzero_pd();
+	d4 = _mm_setzero_pd();
+#endif
+#if MAX_LAG > 10
+	sum5 = _mm_setzero_pd();
+	sum6 = _mm_setzero_pd();
+	d5 = _mm_setzero_pd();
+	d6 = _mm_setzero_pd();
+#endif
+
+	/* Loop backwards through samples from data_len to limit */
+	for(i = data_len-1; i >= 0; i--) {
+		__m128d d = _mm_set1_pd(data[i]);
+
+		/* The next lines of code work like a queue. For more
+		 * information see the lag8 version of this function */
+#if MAX_LAG > 10
+		d6 = _mm_shuffle_pd(d5, d6, _MM_SHUFFLE(0,0,0,1));
+		d5 = _mm_shuffle_pd(d4, d5, _MM_SHUFFLE(0,0,0,1));
+#endif
+#if MAX_LAG > 8
+		d4 = _mm_shuffle_pd(d3, d4, _MM_SHUFFLE(0,0,0,1));
+#endif
+		d3 = _mm_shuffle_pd(d2, d3, _MM_SHUFFLE(0,0,0,1));
+		d2 = _mm_shuffle_pd(d1, d2, _MM_SHUFFLE(0,0,0,1));
+		d1 = _mm_shuffle_pd(d0, d1, _MM_SHUFFLE(0,0,0,1));
+		d0 = _mm_shuffle_pd(d,  d0, _MM_SHUFFLE(0,0,0,1));
+
+		/* sumn += d*dn */
+		sum0 = _mm_add_pd(sum0, _mm_mul_pd(d, d0));
+		sum1 = _mm_add_pd(sum1, _mm_mul_pd(d, d1));
+		sum2 = _mm_add_pd(sum2, _mm_mul_pd(d, d2));
+		sum3 = _mm_add_pd(sum3, _mm_mul_pd(d, d3));
+#if MAX_LAG > 8
+		sum4 = _mm_add_pd(sum4, _mm_mul_pd(d, d4));
+#endif
+#if MAX_LAG > 10
+		sum5 = _mm_add_pd(sum5, _mm_mul_pd(d, d5));
+		sum6 = _mm_add_pd(sum6, _mm_mul_pd(d, d6));
+#endif
+	}
+
+	/* Store sum0..sum6 in autoc[0..14] */
+	_mm_storeu_pd(autoc,   sum0);
+	_mm_storeu_pd(autoc+2, sum1);
+	_mm_storeu_pd(autoc+4, sum2);
+	_mm_storeu_pd(autoc+6 ,sum3);
+#if MAX_LAG > 8
+	_mm_storeu_pd(autoc+8, sum4);
+#endif
+#if MAX_LAG > 10
+	_mm_storeu_pd(autoc+10,sum5);
+	_mm_storeu_pd(autoc+12,sum6);
+#endif

diff --git a/src/libFLAC/deduplication/lpc_compute_autocorrelation_intrin_vsx.c b/src/libFLAC/deduplication/lpc_compute_autocorrelation_intrin_vsx.c
new file mode 100644
index 0000000..721d2a0
--- /dev/null
+++ b/src/libFLAC/deduplication/lpc_compute_autocorrelation_intrin_vsx.c

@@ -0,0 +1,179 @@
+/* This code is imported several times in lpc_intrin_vsx.c with different
+ * values for MAX_LAG. Comments are for MAX_LAG == 14 */
+
+long i;
+long limit = (long)data_len - MAX_LAG;
+const FLAC__real *base;
+vector double d0, d1, d2, d3;
+vector double sum0 = { 0.0f, 0.0f};
+vector double sum10 = { 0.0f, 0.0f};
+vector double sum1 = { 0.0f, 0.0f};
+vector double sum11 = { 0.0f, 0.0f};
+vector double sum2 = { 0.0f, 0.0f};
+vector double sum12 = { 0.0f, 0.0f};
+vector double sum3 = { 0.0f, 0.0f};
+vector double sum13 = { 0.0f, 0.0f};
+#if MAX_LAG > 8
+vector double d4;
+vector double sum4 = { 0.0f, 0.0f};
+vector double sum14 = { 0.0f, 0.0f};
+#endif
+#if MAX_LAG > 10
+vector double d5, d6;
+vector double sum5 = { 0.0f, 0.0f};
+vector double sum15 = { 0.0f, 0.0f};
+vector double sum6 = { 0.0f, 0.0f};
+vector double sum16 = { 0.0f, 0.0f};
+#endif
+
+vector float dtemp;
+
+#if WORDS_BIGENDIAN
+vector unsigned long long vperm = { 0x08090A0B0C0D0E0F, 0x1011121314151617 };
+vector unsigned long long vsel = { 0x0000000000000000, 0xFFFFFFFFFFFFFFFF };
+#else
+vector unsigned long long vperm = { 0x0F0E0D0C0B0A0908, 0x1716151413121110 };
+vector unsigned long long vsel = { 0xFFFFFFFFFFFFFFFF, 0x0000000000000000 };
+#endif
+
+(void) lag;
+FLAC__ASSERT(lag <= MAX_LAG);
+
+base = data;
+
+/* First, check whether it is possible to load
+ * 16 elements at once */
+if(limit > 2){
+	/* Convert all floats to doubles */
+	dtemp = vec_vsx_ld(0, base);
+	d0 = vec_doubleh(dtemp);
+	d1 = vec_doublel(dtemp);
+	dtemp = vec_vsx_ld(16, base);
+	d2 = vec_doubleh(dtemp);
+	d3 = vec_doublel(dtemp);
+#if MAX_LAG > 8
+	dtemp = vec_vsx_ld(32, base);
+	d4 = vec_doubleh(dtemp);
+#endif
+#if MAX_LAG > 10
+	d5 = vec_doublel(dtemp);
+	dtemp = vec_vsx_ld(48, base);
+	d6 = vec_doubleh(dtemp);
+#endif
+
+	base += MAX_LAG;
+
+	/* Loop until nearing data_len */
+	for (i = 0; i <= (limit-2); i += 2) {
+		vector double d, dnext;
+
+		/* Load next 2 datapoints and convert to double
+		 * for lag 14 that is data[i+14] and data[i+15] */
+		dtemp = vec_vsx_ld(0, base);
+		dnext = vec_doubleh(dtemp);
+		base += 2;
+
+		/* Create vector d with both elements set to the first
+		 * element of d0, so both elements data[i] */
+		d = vec_splat(d0, 0);
+		sum0 += d0 * d; // Multiply data[i] with data[i] and data[i+1]
+		sum1 += d1 * d; // Multiply data[i] with data[i+2] and data[i+3]
+		sum2 += d2 * d; // Multiply data[i] with data[i+4] and data[i+5]
+		sum3 += d3 * d; // Multiply data[i] with data[i+6] and data[i+7]
+#if MAX_LAG > 8
+		sum4 += d4 * d; // Multiply data[i] with data[i+8] and data[i+9]
+#endif
+#if MAX_LAG > 10
+		sum5 += d5 * d; // Multiply data[i] with data[i+10] and data[i+11]
+		sum6 += d6 * d; // Multiply data[i] with data[i+12] and data[i+13]
+#endif
+
+		/* Set both elements of d to data[i+1] */
+		d = vec_splat(d0, 1);
+
+		/* Set d0 to data[i+14] and data[i+1] */
+		d0 = vec_sel(d0, dnext, vsel);
+		sum10 += d0 * d; /* Multiply data[i+1] with data[i+14] and data[i+1] */
+		sum11 += d1 * d; /* Multiply data[i+1] with data[i+2] and data[i+3] */
+		sum12 += d2 * d;
+		sum13 += d3 * d;
+#if MAX_LAG > 8
+		sum14 += d4 * d;
+#endif
+#if MAX_LAG > 10
+		sum15 += d5 * d;
+		sum16 += d6 * d; /* Multiply data[i+1] with data[i+12] and data[i+13] */
+#endif
+
+		/* Shift all loaded values one vector (2 elements) so the next
+		 * iterations aligns again */
+		d0 = d1;
+		d1 = d2;
+		d2 = d3;
+#if MAX_LAG > 8
+		d3 = d4;
+#endif
+#if MAX_LAG > 10
+		d4 = d5;
+		d5 = d6;
+#endif
+
+#if MAX_LAG == 8
+		d3 = dnext;
+#elif MAX_LAG == 10
+		d4 = dnext;
+#elif MAX_LAG == 14
+		d6 = dnext;
+#else
+#error "Unsupported lag";
+#endif
+	}
+
+	/* Because the values in sum10..sum16 do not align with
+	 * the values in sum0..sum6, these need to be 'left-rotated'
+	 * before adding them to sum0..sum6 */
+	sum0 += vec_perm(sum10, sum11, (vector unsigned char)vperm);
+	sum1 += vec_perm(sum11, sum12, (vector unsigned char)vperm);
+	sum2 += vec_perm(sum12, sum13, (vector unsigned char)vperm);
+#if MAX_LAG > 8
+	sum3 += vec_perm(sum13, sum14, (vector unsigned char)vperm);
+#endif
+#if MAX_LAG > 10
+	sum4 += vec_perm(sum14, sum15, (vector unsigned char)vperm);
+	sum5 += vec_perm(sum15, sum16, (vector unsigned char)vperm);
+#endif
+
+#if MAX_LAG == 8
+	sum3 += vec_perm(sum13, sum10, (vector unsigned char)vperm);
+#elif MAX_LAG == 10
+	sum4 += vec_perm(sum14, sum10, (vector unsigned char)vperm);
+#elif MAX_LAG == 14
+	sum6 += vec_perm(sum16, sum10, (vector unsigned char)vperm);
+#else
+#error "Unsupported lag";
+#endif
+}else{
+	i = 0;
+}
+
+/* Store result */
+vec_vsx_st(sum0, 0, autoc);
+vec_vsx_st(sum1, 16, autoc);
+vec_vsx_st(sum2, 32, autoc);
+vec_vsx_st(sum3, 48, autoc);
+#if MAX_LAG > 8
+vec_vsx_st(sum4, 64, autoc);
+#endif
+#if MAX_LAG > 10
+vec_vsx_st(sum5, 80, autoc);
+vec_vsx_st(sum6, 96, autoc);
+#endif
+
+/* Process remainder of samples in a non-VSX way */
+for (; i < (long)data_len; i++) {
+	uint32_t coeff;
+
+	FLAC__real d = data[i];
+	for (coeff = 0; coeff < data_len - i; coeff++)
+		autoc[coeff] += d * data[i+coeff];
+}

diff --git a/src/libFLAC/fixed.c b/src/libFLAC/fixed.c
index 74b31b3..da5c7da 100644
--- a/src/libFLAC/fixed.c
+++ b/src/libFLAC/fixed.c

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2000-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -45,7 +45,12 @@
 #ifdef local_abs
 #undef local_abs
 #endif
-#define local_abs(x) ((unsigned)((x)<0? -(x) : (x)))
+#define local_abs(x) ((uint32_t)((x)<0? -(x) : (x)))
+
+#ifdef local_abs64
+#undef local_abs64
+#endif
+#define local_abs64(x) ((uint64_t)((x)<0? -(x) : (x)))
 
 #ifdef FLAC__INTEGER_ONLY_LIBRARY
 /* rbps stands for residual bits per sample
@@ -57,7 +62,7 @@
 static FLAC__fixedpoint local__compute_rbps_integerized(FLAC__uint32 err, FLAC__uint32 n)
 {
 	FLAC__uint32 rbps;
-	unsigned bits; /* the number of bits required to represent a number */
+	uint32_t bits; /* the number of bits required to represent a number */
 	int fracbits; /* the number of bits of rbps that comprise the fractional part */
 
 	FLAC__ASSERT(sizeof(rbps) == sizeof(FLAC__fixedpoint));
@@ -105,7 +110,7 @@
 		}
 	}
 
-	rbps = FLAC__fixedpoint_log2(rbps, fracbits, (unsigned)(-1));
+	rbps = FLAC__fixedpoint_log2(rbps, fracbits, (uint32_t)(-1));
 
 	if(rbps == 0)
 		return 0;
@@ -136,7 +141,7 @@
 static FLAC__fixedpoint local__compute_rbps_wide_integerized(FLAC__uint64 err, FLAC__uint32 n)
 {
 	FLAC__uint32 rbps;
-	unsigned bits; /* the number of bits required to represent a number */
+	uint32_t bits; /* the number of bits required to represent a number */
 	int fracbits; /* the number of bits of rbps that comprise the fractional part */
 
 	FLAC__ASSERT(sizeof(rbps) == sizeof(FLAC__fixedpoint));
@@ -184,7 +189,7 @@
 		}
 	}
 
-	rbps = FLAC__fixedpoint_log2(rbps, fracbits, (unsigned)(-1));
+	rbps = FLAC__fixedpoint_log2(rbps, fracbits, (uint32_t)(-1));
 
 	if(rbps == 0)
 		return 0;
@@ -214,19 +219,28 @@
 #endif
 
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
-unsigned FLAC__fixed_compute_best_predictor(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1])
+uint32_t FLAC__fixed_compute_best_predictor(const FLAC__int32 data[], uint32_t data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1])
 #else
-unsigned FLAC__fixed_compute_best_predictor(const FLAC__int32 data[], unsigned data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1])
+uint32_t FLAC__fixed_compute_best_predictor(const FLAC__int32 data[], uint32_t data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1])
 #endif
 {
+	FLAC__uint32 total_error_0 = 0, total_error_1 = 0, total_error_2 = 0, total_error_3 = 0, total_error_4 = 0;
+	uint32_t order;
+#if 0
+	/* This code has been around a long time, and was written when compilers weren't able
+	 * to vectorize code. These days, compilers are better in optimizing the next block
+	 * which is also much more readable
+	 */
 	FLAC__int32 last_error_0 = data[-1];
 	FLAC__int32 last_error_1 = data[-1] - data[-2];
 	FLAC__int32 last_error_2 = last_error_1 - (data[-2] - data[-3]);
 	FLAC__int32 last_error_3 = last_error_2 - (data[-2] - 2*data[-3] + data[-4]);
 	FLAC__int32 error, save;
-	FLAC__uint32 total_error_0 = 0, total_error_1 = 0, total_error_2 = 0, total_error_3 = 0, total_error_4 = 0;
-	unsigned i, order;
-
+	uint32_t i;
+	/* total_error_* are 64-bits to avoid overflow when encoding
+	 * erratic signals when the bits-per-sample and blocksize are
+	 * large.
+	 */
 	for(i = 0; i < data_len; i++) {
 		error  = data[i]     ; total_error_0 += local_abs(error);                      save = error;
 		error -= last_error_0; total_error_1 += local_abs(error); last_error_0 = save; save = error;
@@ -234,14 +248,26 @@
 		error -= last_error_2; total_error_3 += local_abs(error); last_error_2 = save; save = error;
 		error -= last_error_3; total_error_4 += local_abs(error); last_error_3 = save;
 	}
+#else
+	int i;
+	for(i = 0; i < (int)data_len; i++) {
+		total_error_0 += local_abs(data[i]);
+		total_error_1 += local_abs(data[i] - data[i-1]);
+		total_error_2 += local_abs(data[i] - 2 * data[i-1] + data[i-2]);
+		total_error_3 += local_abs(data[i] - 3 * data[i-1] + 3 * data[i-2] - data[i-3]);
+		total_error_4 += local_abs(data[i] - 4 * data[i-1] + 6 * data[i-2] - 4 * data[i-3] + data[i-4]);
+	}
+#endif
 
-	if(total_error_0 < flac_min(flac_min(flac_min(total_error_1, total_error_2), total_error_3), total_error_4))
+
+	/* prefer lower order */
+	if(total_error_0 <= flac_min(flac_min(flac_min(total_error_1, total_error_2), total_error_3), total_error_4))
 		order = 0;
-	else if(total_error_1 < flac_min(flac_min(total_error_2, total_error_3), total_error_4))
+	else if(total_error_1 <= flac_min(flac_min(total_error_2, total_error_3), total_error_4))
 		order = 1;
-	else if(total_error_2 < flac_min(total_error_3, total_error_4))
+	else if(total_error_2 <= flac_min(total_error_3, total_error_4))
 		order = 2;
-	else if(total_error_3 < total_error_4)
+	else if(total_error_3 <= total_error_4)
 		order = 3;
 	else
 		order = 4;
@@ -255,11 +281,11 @@
 	FLAC__ASSERT(data_len > 0 || total_error_3 == 0);
 	FLAC__ASSERT(data_len > 0 || total_error_4 == 0);
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
-	residual_bits_per_sample[0] = (FLAC__float)((total_error_0 > 0) ? log(M_LN2 * (FLAC__double)total_error_0 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	residual_bits_per_sample[1] = (FLAC__float)((total_error_1 > 0) ? log(M_LN2 * (FLAC__double)total_error_1 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	residual_bits_per_sample[2] = (FLAC__float)((total_error_2 > 0) ? log(M_LN2 * (FLAC__double)total_error_2 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	residual_bits_per_sample[3] = (FLAC__float)((total_error_3 > 0) ? log(M_LN2 * (FLAC__double)total_error_3 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	residual_bits_per_sample[4] = (FLAC__float)((total_error_4 > 0) ? log(M_LN2 * (FLAC__double)total_error_4 / (FLAC__double)data_len) / M_LN2 : 0.0);
+	residual_bits_per_sample[0] = (float)((total_error_0 > 0) ? log(M_LN2 * (double)total_error_0 / (double)data_len) / M_LN2 : 0.0);
+	residual_bits_per_sample[1] = (float)((total_error_1 > 0) ? log(M_LN2 * (double)total_error_1 / (double)data_len) / M_LN2 : 0.0);
+	residual_bits_per_sample[2] = (float)((total_error_2 > 0) ? log(M_LN2 * (double)total_error_2 / (double)data_len) / M_LN2 : 0.0);
+	residual_bits_per_sample[3] = (float)((total_error_3 > 0) ? log(M_LN2 * (double)total_error_3 / (double)data_len) / M_LN2 : 0.0);
+	residual_bits_per_sample[4] = (float)((total_error_4 > 0) ? log(M_LN2 * (double)total_error_4 / (double)data_len) / M_LN2 : 0.0);
 #else
 	residual_bits_per_sample[0] = (total_error_0 > 0) ? local__compute_rbps_integerized(total_error_0, data_len) : 0;
 	residual_bits_per_sample[1] = (total_error_1 > 0) ? local__compute_rbps_integerized(total_error_1, data_len) : 0;
@@ -272,38 +298,31 @@
 }
 
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
-unsigned FLAC__fixed_compute_best_predictor_wide(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1])
+uint32_t FLAC__fixed_compute_best_predictor_wide(const FLAC__int32 data[], uint32_t data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1])
 #else
-unsigned FLAC__fixed_compute_best_predictor_wide(const FLAC__int32 data[], unsigned data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1])
+uint32_t FLAC__fixed_compute_best_predictor_wide(const FLAC__int32 data[], uint32_t data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1])
 #endif
 {
-	FLAC__int32 last_error_0 = data[-1];
-	FLAC__int32 last_error_1 = data[-1] - data[-2];
-	FLAC__int32 last_error_2 = last_error_1 - (data[-2] - data[-3]);
-	FLAC__int32 last_error_3 = last_error_2 - (data[-2] - 2*data[-3] + data[-4]);
-	FLAC__int32 error, save;
-	/* total_error_* are 64-bits to avoid overflow when encoding
-	 * erratic signals when the bits-per-sample and blocksize are
-	 * large.
-	 */
 	FLAC__uint64 total_error_0 = 0, total_error_1 = 0, total_error_2 = 0, total_error_3 = 0, total_error_4 = 0;
-	unsigned i, order;
+	uint32_t order;
+	int i;
 
-	for(i = 0; i < data_len; i++) {
-		error  = data[i]     ; total_error_0 += local_abs(error);                      save = error;
-		error -= last_error_0; total_error_1 += local_abs(error); last_error_0 = save; save = error;
-		error -= last_error_1; total_error_2 += local_abs(error); last_error_1 = save; save = error;
-		error -= last_error_2; total_error_3 += local_abs(error); last_error_2 = save; save = error;
-		error -= last_error_3; total_error_4 += local_abs(error); last_error_3 = save;
+	for(i = 0; i < (int)data_len; i++) {
+		total_error_0 += local_abs(data[i]);
+		total_error_1 += local_abs(data[i] - data[i-1]);
+		total_error_2 += local_abs(data[i] - 2 * data[i-1] + data[i-2]);
+		total_error_3 += local_abs(data[i] - 3 * data[i-1] + 3 * data[i-2] - data[i-3]);
+		total_error_4 += local_abs(data[i] - 4 * data[i-1] + 6 * data[i-2] - 4 * data[i-3] + data[i-4]);
 	}
 
-	if(total_error_0 < flac_min(flac_min(flac_min(total_error_1, total_error_2), total_error_3), total_error_4))
+	/* prefer lower order */
+	if(total_error_0 <= flac_min(flac_min(flac_min(total_error_1, total_error_2), total_error_3), total_error_4))
 		order = 0;
-	else if(total_error_1 < flac_min(flac_min(total_error_2, total_error_3), total_error_4))
+	else if(total_error_1 <= flac_min(flac_min(total_error_2, total_error_3), total_error_4))
 		order = 1;
-	else if(total_error_2 < flac_min(total_error_3, total_error_4))
+	else if(total_error_2 <= flac_min(total_error_3, total_error_4))
 		order = 2;
-	else if(total_error_3 < total_error_4)
+	else if(total_error_3 <= total_error_4)
 		order = 3;
 	else
 		order = 4;
@@ -317,11 +336,11 @@
 	FLAC__ASSERT(data_len > 0 || total_error_3 == 0);
 	FLAC__ASSERT(data_len > 0 || total_error_4 == 0);
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
-	residual_bits_per_sample[0] = (FLAC__float)((total_error_0 > 0) ? log(M_LN2 * (FLAC__double)total_error_0 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	residual_bits_per_sample[1] = (FLAC__float)((total_error_1 > 0) ? log(M_LN2 * (FLAC__double)total_error_1 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	residual_bits_per_sample[2] = (FLAC__float)((total_error_2 > 0) ? log(M_LN2 * (FLAC__double)total_error_2 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	residual_bits_per_sample[3] = (FLAC__float)((total_error_3 > 0) ? log(M_LN2 * (FLAC__double)total_error_3 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	residual_bits_per_sample[4] = (FLAC__float)((total_error_4 > 0) ? log(M_LN2 * (FLAC__double)total_error_4 / (FLAC__double)data_len) / M_LN2 : 0.0);
+	residual_bits_per_sample[0] = (float)((total_error_0 > 0) ? log(M_LN2 * (double)total_error_0 / (double)data_len) / M_LN2 : 0.0);
+	residual_bits_per_sample[1] = (float)((total_error_1 > 0) ? log(M_LN2 * (double)total_error_1 / (double)data_len) / M_LN2 : 0.0);
+	residual_bits_per_sample[2] = (float)((total_error_2 > 0) ? log(M_LN2 * (double)total_error_2 / (double)data_len) / M_LN2 : 0.0);
+	residual_bits_per_sample[3] = (float)((total_error_3 > 0) ? log(M_LN2 * (double)total_error_3 / (double)data_len) / M_LN2 : 0.0);
+	residual_bits_per_sample[4] = (float)((total_error_4 > 0) ? log(M_LN2 * (double)total_error_4 / (double)data_len) / M_LN2 : 0.0);
 #else
 	residual_bits_per_sample[0] = (total_error_0 > 0) ? local__compute_rbps_wide_integerized(total_error_0, data_len) : 0;
 	residual_bits_per_sample[1] = (total_error_1 > 0) ? local__compute_rbps_wide_integerized(total_error_1, data_len) : 0;
@@ -333,7 +352,122 @@
 	return order;
 }
 
-void FLAC__fixed_compute_residual(const FLAC__int32 data[], unsigned data_len, unsigned order, FLAC__int32 residual[])
+#ifndef FLAC__INTEGER_ONLY_LIBRARY
+#define CHECK_ORDER_IS_VALID(macro_order)		\
+if(order_##macro_order##_is_valid && total_error_##macro_order < smallest_error) { \
+	order = macro_order;				\
+	smallest_error = total_error_##macro_order ;	\
+	residual_bits_per_sample[ macro_order ] = (float)((total_error_0 > 0) ? log(M_LN2 * (double)total_error_0 / (double)data_len) / M_LN2 : 0.0); \
+}							\
+else							\
+	residual_bits_per_sample[ macro_order ] = 34.0f;
+#else
+#define CHECK_ORDER_IS_VALID(macro_order)		\
+if(order_##macro_order##_is_valid && total_error_##macro_order < smallest_error) { \
+	order = macro_order;				\
+	smallest_error = total_error_##macro_order ;	\
+	residual_bits_per_sample[ macro_order ] = (total_error_##macro_order > 0) ? local__compute_rbps_wide_integerized(total_error_##macro_order, data_len) : 0; \
+}							\
+else							\
+	residual_bits_per_sample[ macro_order ] = 34 * FLAC__FP_ONE;
+#endif
+
+
+#ifndef FLAC__INTEGER_ONLY_LIBRARY
+uint32_t FLAC__fixed_compute_best_predictor_limit_residual(const FLAC__int32 data[], uint32_t data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1])
+#else
+uint32_t FLAC__fixed_compute_best_predictor_limit_residual(const FLAC__int32 data[], uint32_t data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1])
+#endif
+{
+	FLAC__uint64 total_error_0 = 0, total_error_1 = 0, total_error_2 = 0, total_error_3 = 0, total_error_4 = 0, smallest_error = UINT64_MAX;
+	FLAC__uint64 error_0, error_1, error_2, error_3, error_4;
+	FLAC__bool order_0_is_valid = true, order_1_is_valid = true, order_2_is_valid = true, order_3_is_valid = true, order_4_is_valid = true;
+	uint32_t order = 0;
+	int i;
+
+	for(i = 0; i < (int)data_len; i++) {
+		error_0 = local_abs64((FLAC__int64)data[i]);
+		error_1 = (i > 0) ? local_abs64((FLAC__int64)data[i] - data[i-1]) : 0 ;
+		error_2 = (i > 1) ? local_abs64((FLAC__int64)data[i] - 2 * (FLAC__int64)data[i-1] + data[i-2]) : 0;
+		error_3 = (i > 2) ? local_abs64((FLAC__int64)data[i] - 3 * (FLAC__int64)data[i-1] + 3 * (FLAC__int64)data[i-2] - data[i-3]) : 0;
+		error_4 = (i > 3) ? local_abs64((FLAC__int64)data[i] - 4 * (FLAC__int64)data[i-1] + 6 * (FLAC__int64)data[i-2] - 4 * (FLAC__int64)data[i-3] + data[i-4]) : 0;
+
+		total_error_0 += error_0;
+		total_error_1 += error_1;
+		total_error_2 += error_2;
+		total_error_3 += error_3;
+		total_error_4 += error_4;
+
+		/* residual must not be INT32_MIN because abs(INT32_MIN) is undefined */
+		if(error_0 > INT32_MAX)
+			order_0_is_valid = false;
+		if(error_1 > INT32_MAX)
+			order_1_is_valid = false;
+		if(error_2 > INT32_MAX)
+			order_2_is_valid = false;
+		if(error_3 > INT32_MAX)
+			order_3_is_valid = false;
+		if(error_4 > INT32_MAX)
+			order_4_is_valid = false;
+	}
+
+	CHECK_ORDER_IS_VALID(0);
+	CHECK_ORDER_IS_VALID(1);
+	CHECK_ORDER_IS_VALID(2);
+	CHECK_ORDER_IS_VALID(3);
+	CHECK_ORDER_IS_VALID(4);
+
+	return order;
+}
+
+#ifndef FLAC__INTEGER_ONLY_LIBRARY
+uint32_t FLAC__fixed_compute_best_predictor_limit_residual_33bit(const FLAC__int64 data[], uint32_t data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1])
+#else
+uint32_t FLAC__fixed_compute_best_predictor_limit_residual_33bit(const FLAC__int64 data[], uint32_t data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1])
+#endif
+{
+	FLAC__uint64 total_error_0 = 0, total_error_1 = 0, total_error_2 = 0, total_error_3 = 0, total_error_4 = 0, smallest_error = UINT64_MAX;
+	FLAC__uint64 error_0, error_1, error_2, error_3, error_4;
+	FLAC__bool order_0_is_valid = true, order_1_is_valid = true, order_2_is_valid = true, order_3_is_valid = true, order_4_is_valid = true;
+	uint32_t order = 0;
+	int i;
+
+	for(i = 0; i < (int)data_len; i++) {
+		error_0 = local_abs64(data[i]);
+		error_1 = (i > 0) ? local_abs64(data[i] - data[i-1]) : 0 ;
+		error_2 = (i > 1) ? local_abs64(data[i] - 2 * data[i-1] + data[i-2]) : 0;
+		error_3 = (i > 2) ? local_abs64(data[i] - 3 * data[i-1] + 3 * data[i-2] - data[i-3]) : 0;
+		error_4 = (i > 3) ? local_abs64(data[i] - 4 * data[i-1] + 6 * data[i-2] - 4 * data[i-3] + data[i-4]) : 0;
+
+		total_error_0 += error_0;
+		total_error_1 += error_1;
+		total_error_2 += error_2;
+		total_error_3 += error_3;
+		total_error_4 += error_4;
+
+		/* residual must not be INT32_MIN because abs(INT32_MIN) is undefined */
+		if(error_0 > INT32_MAX)
+			order_0_is_valid = false;
+		if(error_1 > INT32_MAX)
+			order_1_is_valid = false;
+		if(error_2 > INT32_MAX)
+			order_2_is_valid = false;
+		if(error_3 > INT32_MAX)
+			order_3_is_valid = false;
+		if(error_4 > INT32_MAX)
+			order_4_is_valid = false;
+	}
+
+	CHECK_ORDER_IS_VALID(0);
+	CHECK_ORDER_IS_VALID(1);
+	CHECK_ORDER_IS_VALID(2);
+	CHECK_ORDER_IS_VALID(3);
+	CHECK_ORDER_IS_VALID(4);
+
+	return order;
+}
+
+void FLAC__fixed_compute_residual(const FLAC__int32 data[], uint32_t data_len, uint32_t order, FLAC__int32 residual[])
 {
 	const int idata_len = (int)data_len;
 	int i;
@@ -349,34 +483,92 @@
 			break;
 		case 2:
 			for(i = 0; i < idata_len; i++)
-#if 1 /* OPT: may be faster with some compilers on some systems */
-				residual[i] = data[i] - (data[i-1] << 1) + data[i-2];
-#else
 				residual[i] = data[i] - 2*data[i-1] + data[i-2];
-#endif
 			break;
 		case 3:
 			for(i = 0; i < idata_len; i++)
-#if 1 /* OPT: may be faster with some compilers on some systems */
-				residual[i] = data[i] - (((data[i-1]-data[i-2])<<1) + (data[i-1]-data[i-2])) - data[i-3];
-#else
 				residual[i] = data[i] - 3*data[i-1] + 3*data[i-2] - data[i-3];
-#endif
 			break;
 		case 4:
 			for(i = 0; i < idata_len; i++)
-#if 1 /* OPT: may be faster with some compilers on some systems */
-				residual[i] = data[i] - ((data[i-1]+data[i-3])<<2) + ((data[i-2]<<2) + (data[i-2]<<1)) + data[i-4];
-#else
 				residual[i] = data[i] - 4*data[i-1] + 6*data[i-2] - 4*data[i-3] + data[i-4];
-#endif
 			break;
 		default:
 			FLAC__ASSERT(0);
 	}
 }
 
-void FLAC__fixed_restore_signal(const FLAC__int32 residual[], unsigned data_len, unsigned order, FLAC__int32 data[])
+void FLAC__fixed_compute_residual_wide(const FLAC__int32 data[], uint32_t data_len, uint32_t order, FLAC__int32 residual[])
+{
+	const int idata_len = (int)data_len;
+	int i;
+
+	switch(order) {
+		case 0:
+			FLAC__ASSERT(sizeof(residual[0]) == sizeof(data[0]));
+			memcpy(residual, data, sizeof(residual[0])*data_len);
+			break;
+		case 1:
+			for(i = 0; i < idata_len; i++)
+				residual[i] = (FLAC__int64)data[i] - data[i-1];
+			break;
+		case 2:
+			for(i = 0; i < idata_len; i++)
+				residual[i] = (FLAC__int64)data[i] - 2*(FLAC__int64)data[i-1] + data[i-2];
+			break;
+		case 3:
+			for(i = 0; i < idata_len; i++)
+				residual[i] = (FLAC__int64)data[i] - 3*(FLAC__int64)data[i-1] + 3*(FLAC__int64)data[i-2] - data[i-3];
+			break;
+		case 4:
+			for(i = 0; i < idata_len; i++)
+				residual[i] = (FLAC__int64)data[i] - 4*(FLAC__int64)data[i-1] + 6*(FLAC__int64)data[i-2] - 4*(FLAC__int64)data[i-3] + data[i-4];
+			break;
+		default:
+			FLAC__ASSERT(0);
+	}
+}
+
+void FLAC__fixed_compute_residual_wide_33bit(const FLAC__int64 data[], uint32_t data_len, uint32_t order, FLAC__int32 residual[])
+{
+	const int idata_len = (int)data_len;
+	int i;
+
+	switch(order) {
+		case 0:
+			for(i = 0; i < idata_len; i++)
+				residual[i] = data[i];
+			break;
+		case 1:
+			for(i = 0; i < idata_len; i++)
+				residual[i] = data[i] - data[i-1];
+			break;
+		case 2:
+			for(i = 0; i < idata_len; i++)
+				residual[i] = data[i] - 2*data[i-1] + data[i-2];
+			break;
+		case 3:
+			for(i = 0; i < idata_len; i++)
+				residual[i] = data[i] - 3*data[i-1] + 3*data[i-2] - data[i-3];
+			break;
+		case 4:
+			for(i = 0; i < idata_len; i++)
+				residual[i] = data[i] - 4*data[i-1] + 6*data[i-2] - 4*data[i-3] + data[i-4];
+			break;
+		default:
+			FLAC__ASSERT(0);
+	}
+}
+
+#ifdef FUZZING_BUILD_MODE_NO_SANITIZE_SIGNED_INTEGER_OVERFLOW
+/* The attribute below is to silence the undefined sanitizer of oss-fuzz.
+ * Because fuzzing feeds bogus predictors and residual samples to the
+ * decoder, having overflows in this section is unavoidable. Also,
+ * because the calculated values are audio path only, there is no
+ * potential for security problems */
+__attribute__((no_sanitize("signed-integer-overflow")))
+#endif
+void FLAC__fixed_restore_signal(const FLAC__int32 residual[], uint32_t data_len, uint32_t order, FLAC__int32 data[])
 {
 	int i, idata_len = (int)data_len;
 
@@ -391,27 +583,83 @@
 			break;
 		case 2:
 			for(i = 0; i < idata_len; i++)
-#if 1 /* OPT: may be faster with some compilers on some systems */
-				data[i] = residual[i] + (data[i-1]<<1) - data[i-2];
-#else
 				data[i] = residual[i] + 2*data[i-1] - data[i-2];
-#endif
 			break;
 		case 3:
 			for(i = 0; i < idata_len; i++)
-#if 1 /* OPT: may be faster with some compilers on some systems */
-				data[i] = residual[i] + (((data[i-1]-data[i-2])<<1) + (data[i-1]-data[i-2])) + data[i-3];
-#else
 				data[i] = residual[i] + 3*data[i-1] - 3*data[i-2] + data[i-3];
-#endif
 			break;
 		case 4:
 			for(i = 0; i < idata_len; i++)
-#if 1 /* OPT: may be faster with some compilers on some systems */
-				data[i] = residual[i] + ((data[i-1]+data[i-3])<<2) - ((data[i-2]<<2) + (data[i-2]<<1)) - data[i-4];
-#else
 				data[i] = residual[i] + 4*data[i-1] - 6*data[i-2] + 4*data[i-3] - data[i-4];
+			break;
+		default:
+			FLAC__ASSERT(0);
+	}
+}
+
+void FLAC__fixed_restore_signal_wide(const FLAC__int32 residual[], uint32_t data_len, uint32_t order, FLAC__int32 data[])
+{
+	int i, idata_len = (int)data_len;
+
+	switch(order) {
+		case 0:
+			FLAC__ASSERT(sizeof(residual[0]) == sizeof(data[0]));
+			memcpy(data, residual, sizeof(residual[0])*data_len);
+			break;
+		case 1:
+			for(i = 0; i < idata_len; i++)
+				data[i] = (FLAC__int64)residual[i] + (FLAC__int64)data[i-1];
+			break;
+		case 2:
+			for(i = 0; i < idata_len; i++)
+				data[i] = (FLAC__int64)residual[i] + 2*(FLAC__int64)data[i-1] - (FLAC__int64)data[i-2];
+			break;
+		case 3:
+			for(i = 0; i < idata_len; i++)
+				data[i] = (FLAC__int64)residual[i] + 3*(FLAC__int64)data[i-1] - 3*(FLAC__int64)data[i-2] + (FLAC__int64)data[i-3];
+			break;
+		case 4:
+			for(i = 0; i < idata_len; i++)
+				data[i] = (FLAC__int64)residual[i] + 4*(FLAC__int64)data[i-1] - 6*(FLAC__int64)data[i-2] + 4*(FLAC__int64)data[i-3] - (FLAC__int64)data[i-4];
+			break;
+		default:
+			FLAC__ASSERT(0);
+	}
+}
+
+#ifdef FUZZING_BUILD_MODE_NO_SANITIZE_SIGNED_INTEGER_OVERFLOW
+/* The attribute below is to silence the undefined sanitizer of oss-fuzz.
+ * Because fuzzing feeds bogus predictors and residual samples to the
+ * decoder, having overflows in this section is unavoidable. Also,
+ * because the calculated values are audio path only, there is no
+ * potential for security problems */
+__attribute__((no_sanitize("signed-integer-overflow")))
 #endif
+void FLAC__fixed_restore_signal_wide_33bit(const FLAC__int32 residual[], uint32_t data_len, uint32_t order, FLAC__int64 data[])
+{
+	int i, idata_len = (int)data_len;
+
+	switch(order) {
+		case 0:
+			for(i = 0; i < idata_len; i++)
+				data[i] = residual[i];
+			break;
+		case 1:
+			for(i = 0; i < idata_len; i++)
+				data[i] = (FLAC__int64)residual[i] + data[i-1];
+			break;
+		case 2:
+			for(i = 0; i < idata_len; i++)
+				data[i] = (FLAC__int64)residual[i] + 2*data[i-1] - data[i-2];
+			break;
+		case 3:
+			for(i = 0; i < idata_len; i++)
+				data[i] = (FLAC__int64)residual[i] + 3*data[i-1] - 3*data[i-2] + data[i-3];
+			break;
+		case 4:
+			for(i = 0; i < idata_len; i++)
+				data[i] = (FLAC__int64)residual[i] + 4*data[i-1] - 6*data[i-2] + 4*data[i-3] - data[i-4];
 			break;
 		default:
 			FLAC__ASSERT(0);

diff --git a/src/libFLAC/fixed_intrin_sse2.c b/src/libFLAC/fixed_intrin_sse2.c
index 6785f92..3b50895 100644
--- a/src/libFLAC/fixed_intrin_sse2.c
+++ b/src/libFLAC/fixed_intrin_sse2.c

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2000-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -34,6 +34,8 @@
 #  include <config.h>
 #endif
 
+#include "private/cpu.h"
+
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
 #ifndef FLAC__NO_ASM
 #if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
@@ -52,81 +54,118 @@
 #define m128i_to_i64(dest, src) dest = _mm_cvtsi128_si64(src)
 #endif
 
+#ifdef local_abs
+#undef local_abs
+#endif
+#define local_abs(x) ((uint32_t)((x)<0? -(x) : (x)))
+
 FLAC__SSE_TARGET("sse2")
-unsigned FLAC__fixed_compute_best_predictor_intrin_sse2(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1])
+uint32_t FLAC__fixed_compute_best_predictor_intrin_sse2(const FLAC__int32 data[], uint32_t data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1])
 {
 	FLAC__uint32 total_error_0, total_error_1, total_error_2, total_error_3, total_error_4;
-	unsigned i, order;
+	FLAC__int32 i, data_len_int;
+	uint32_t order;
+	__m128i total_err0, total_err1, total_err2, total_err3, total_err4;
+	__m128i prev_err0,  prev_err1,  prev_err2,  prev_err3;
+	__m128i tempA, tempB, bitmask;
+	FLAC__int32 data_scalar[4];
+	FLAC__int32 prev_err0_scalar[4];
+	FLAC__int32 prev_err1_scalar[4];
+	FLAC__int32 prev_err2_scalar[4];
+	FLAC__int32 prev_err3_scalar[4];
+	total_err0 = _mm_setzero_si128();
+	total_err1 = _mm_setzero_si128();
+	total_err2 = _mm_setzero_si128();
+	total_err3 = _mm_setzero_si128();
+	total_err4 = _mm_setzero_si128();
+	data_len_int = data_len;
 
-	__m128i total_err0, total_err1, total_err2;
+	for(i = 0; i < 4; i++){
+		prev_err0_scalar[i] = data[-1+i*(data_len_int/4)];
+		prev_err1_scalar[i] = data[-1+i*(data_len_int/4)] - data[-2+i*(data_len_int/4)];
+		prev_err2_scalar[i] = prev_err1_scalar[i] - (data[-2+i*(data_len_int/4)] - data[-3+i*(data_len_int/4)]);
+		prev_err3_scalar[i] = prev_err2_scalar[i] - (data[-2+i*(data_len_int/4)] - 2*data[-3+i*(data_len_int/4)] + data[-4+i*(data_len_int/4)]);
+	}
+	prev_err0 = _mm_loadu_si128((const __m128i*)prev_err0_scalar);
+	prev_err1 = _mm_loadu_si128((const __m128i*)prev_err1_scalar);
+	prev_err2 = _mm_loadu_si128((const __m128i*)prev_err2_scalar);
+	prev_err3 = _mm_loadu_si128((const __m128i*)prev_err3_scalar);
+	for(i = 0; i < data_len_int / 4; i++){
+		data_scalar[0] = data[i];
+		data_scalar[1] = data[i+data_len/4];
+		data_scalar[2] = data[i+2*(data_len/4)];
+		data_scalar[3] = data[i+3*(data_len/4)];
+		tempA = _mm_loadu_si128((const __m128i*)data_scalar);
+		/* Next three intrinsics calculate tempB as abs of tempA */
+		bitmask = _mm_srai_epi32(tempA, 31);
+		tempB   = _mm_xor_si128(tempA, bitmask);
+		tempB   = _mm_sub_epi32(tempB, bitmask);
+		total_err0 = _mm_add_epi32(total_err0,tempB);
+		tempB = _mm_sub_epi32(tempA,prev_err0);
+		prev_err0 = tempA;
+		/* Next three intrinsics calculate tempA as abs of tempB */
+		bitmask = _mm_srai_epi32(tempB, 31);
+		tempA   = _mm_xor_si128(tempB, bitmask);
+		tempA   = _mm_sub_epi32(tempA, bitmask);
+		total_err1 = _mm_add_epi32(total_err1,tempA);
+		tempA = _mm_sub_epi32(tempB,prev_err1);
+		prev_err1 = tempB;
+		/* Next three intrinsics calculate tempB as abs of tempA */
+		bitmask = _mm_srai_epi32(tempA, 31);
+		tempB   = _mm_xor_si128(tempA, bitmask);
+		tempB   = _mm_sub_epi32(tempB, bitmask);
+		total_err2 = _mm_add_epi32(total_err2,tempB);
+		tempB = _mm_sub_epi32(tempA,prev_err2);
+		prev_err2 = tempA;
+		/* Next three intrinsics calculate tempA as abs of tempB */
+		bitmask = _mm_srai_epi32(tempB, 31);
+		tempA   = _mm_xor_si128(tempB, bitmask);
+		tempA   = _mm_sub_epi32(tempA, bitmask);
+		total_err3 = _mm_add_epi32(total_err3,tempA);
+		tempA = _mm_sub_epi32(tempB,prev_err3);
+		prev_err3 = tempB;
+		/* Next three intrinsics calculate tempB as abs of tempA */
+		bitmask = _mm_srai_epi32(tempA, 31);
+		tempB   = _mm_xor_si128(tempA, bitmask);
+		tempB   = _mm_sub_epi32(tempB, bitmask);
+		total_err4 = _mm_add_epi32(total_err4,tempB);
+	}
+	_mm_storeu_si128((__m128i*)data_scalar,total_err0);
+	total_error_0 = data_scalar[0] + data_scalar[1] + data_scalar[2] + data_scalar[3];
+	_mm_storeu_si128((__m128i*)data_scalar,total_err1);
+	total_error_1 = data_scalar[0] + data_scalar[1] + data_scalar[2] + data_scalar[3];
+	_mm_storeu_si128((__m128i*)data_scalar,total_err2);
+	total_error_2 = data_scalar[0] + data_scalar[1] + data_scalar[2] + data_scalar[3];
+	_mm_storeu_si128((__m128i*)data_scalar,total_err3);
+	total_error_3 = data_scalar[0] + data_scalar[1] + data_scalar[2] + data_scalar[3];
+	_mm_storeu_si128((__m128i*)data_scalar,total_err4);
+	total_error_4 = data_scalar[0] + data_scalar[1] + data_scalar[2] + data_scalar[3];
 
-	{
-		FLAC__int32 itmp;
-		__m128i last_error;
-
-		last_error = _mm_cvtsi32_si128(data[-1]);							// 0   0   0   le0
-		itmp = data[-2];
-		last_error = _mm_shuffle_epi32(last_error, _MM_SHUFFLE(2,1,0,0));
-		last_error = _mm_sub_epi32(last_error, _mm_cvtsi32_si128(itmp));	// 0   0   le0 le1
-		itmp -= data[-3];
-		last_error = _mm_shuffle_epi32(last_error, _MM_SHUFFLE(2,1,0,0));
-		last_error = _mm_sub_epi32(last_error, _mm_cvtsi32_si128(itmp));	// 0   le0 le1 le2
-		itmp -= data[-3] - data[-4];
-		last_error = _mm_shuffle_epi32(last_error, _MM_SHUFFLE(2,1,0,0));
-		last_error = _mm_sub_epi32(last_error, _mm_cvtsi32_si128(itmp));	// le0 le1 le2 le3
-
-		total_err0 = total_err1 = _mm_setzero_si128();
-		for(i = 0; i < data_len; i++) {
-			__m128i err0, err1, tmp;
-			err0 = _mm_cvtsi32_si128(data[i]);								// 0   0   0   e0
-			err1 = _mm_shuffle_epi32(err0, _MM_SHUFFLE(0,0,0,0));			// e0  e0  e0  e0
-#if 1 /* OPT_SSE */
-			err1 = _mm_sub_epi32(err1, last_error);
-			last_error = _mm_srli_si128(last_error, 4);						// 0   le0 le1 le2
-			err1 = _mm_sub_epi32(err1, last_error);
-			last_error = _mm_srli_si128(last_error, 4);						// 0   0   le0 le1
-			err1 = _mm_sub_epi32(err1, last_error);
-			last_error = _mm_srli_si128(last_error, 4);						// 0   0   0   le0
-			err1 = _mm_sub_epi32(err1, last_error);							// e1  e2  e3  e4
-#else
-			last_error = _mm_add_epi32(last_error, _mm_srli_si128(last_error, 8));	// le0  le1  le2+le0  le3+le1
-			last_error = _mm_add_epi32(last_error, _mm_srli_si128(last_error, 4));	// le0  le1+le0  le2+le0+le1  le3+le1+le2+le0
-			err1 = _mm_sub_epi32(err1, last_error);							// e1  e2  e3  e4
-#endif
-			tmp = _mm_slli_si128(err0, 12);									// e0   0   0   0
-			last_error = _mm_srli_si128(err1, 4);							//  0  e1  e2  e3
-			last_error = _mm_or_si128(last_error, tmp);						// e0  e1  e2  e3
-
-			tmp = _mm_srai_epi32(err0, 31);
-			err0 = _mm_xor_si128(err0, tmp);
-			err0 = _mm_sub_epi32(err0, tmp);
-			tmp = _mm_srai_epi32(err1, 31);
-			err1 = _mm_xor_si128(err1, tmp);
-			err1 = _mm_sub_epi32(err1, tmp);
-
-			total_err0 = _mm_add_epi32(total_err0, err0);					// 0   0   0   te0
-			total_err1 = _mm_add_epi32(total_err1, err1);					// te1 te2 te3 te4
+	/* Now the remainder of samples needs to be processed */
+	i *= 4;
+	if(data_len % 4 > 0){
+		FLAC__int32 last_error_0 = data[i-1];
+		FLAC__int32 last_error_1 = data[i-1] - data[i-2];
+		FLAC__int32 last_error_2 = last_error_1 - (data[i-2] - data[i-3]);
+		FLAC__int32 last_error_3 = last_error_2 - (data[i-2] - 2*data[i-3] + data[i-4]);
+		FLAC__int32 error, save;
+		for(; i < data_len_int; i++) {
+			error  = data[i]     ; total_error_0 += local_abs(error);                      save = error;
+			error -= last_error_0; total_error_1 += local_abs(error); last_error_0 = save; save = error;
+			error -= last_error_1; total_error_2 += local_abs(error); last_error_1 = save; save = error;
+			error -= last_error_2; total_error_3 += local_abs(error); last_error_2 = save; save = error;
+			error -= last_error_3; total_error_4 += local_abs(error); last_error_3 = save;
 		}
 	}
-	
-	total_error_0 = _mm_cvtsi128_si32(total_err0);
-	total_err2 = total_err1;											// te1  te2  te3  te4
-	total_err1 = _mm_srli_si128(total_err1, 8);							//  0    0   te1  te2
-	total_error_4 = _mm_cvtsi128_si32(total_err2);
-	total_error_2 = _mm_cvtsi128_si32(total_err1);
-	total_err2 = _mm_srli_si128(total_err2,	4);							//  0   te1  te2  te3
-	total_err1 = _mm_srli_si128(total_err1, 4);							//  0    0    0   te1
-	total_error_3 = _mm_cvtsi128_si32(total_err2);
-	total_error_1 = _mm_cvtsi128_si32(total_err1);
 
-	/* prefer higher order */
-	if(total_error_0 < flac_min(flac_min(flac_min(total_error_1, total_error_2), total_error_3), total_error_4))
+	/* prefer lower order */
+	if(total_error_0 <= flac_min(flac_min(flac_min(total_error_1, total_error_2), total_error_3), total_error_4))
 		order = 0;
-	else if(total_error_1 < flac_min(flac_min(total_error_2, total_error_3), total_error_4))
+	else if(total_error_1 <= flac_min(flac_min(total_error_2, total_error_3), total_error_4))
 		order = 1;
-	else if(total_error_2 < flac_min(total_error_3, total_error_4))
+	else if(total_error_2 <= flac_min(total_error_3, total_error_4))
 		order = 2;
-	else if(total_error_3 < total_error_4)
+	else if(total_error_3 <= total_error_4)
 		order = 3;
 	else
 		order = 4;
@@ -140,27 +179,27 @@
 	FLAC__ASSERT(data_len > 0 || total_error_3 == 0);
 	FLAC__ASSERT(data_len > 0 || total_error_4 == 0);
 
-	residual_bits_per_sample[0] = (FLAC__float)((total_error_0 > 0) ? log(M_LN2 * (FLAC__double)total_error_0 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	residual_bits_per_sample[1] = (FLAC__float)((total_error_1 > 0) ? log(M_LN2 * (FLAC__double)total_error_1 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	residual_bits_per_sample[2] = (FLAC__float)((total_error_2 > 0) ? log(M_LN2 * (FLAC__double)total_error_2 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	residual_bits_per_sample[3] = (FLAC__float)((total_error_3 > 0) ? log(M_LN2 * (FLAC__double)total_error_3 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	residual_bits_per_sample[4] = (FLAC__float)((total_error_4 > 0) ? log(M_LN2 * (FLAC__double)total_error_4 / (FLAC__double)data_len) / M_LN2 : 0.0);
+	residual_bits_per_sample[0] = (float)((total_error_0 > 0) ? log(M_LN2 * (double)total_error_0 / (double)data_len) / M_LN2 : 0.0);
+	residual_bits_per_sample[1] = (float)((total_error_1 > 0) ? log(M_LN2 * (double)total_error_1 / (double)data_len) / M_LN2 : 0.0);
+	residual_bits_per_sample[2] = (float)((total_error_2 > 0) ? log(M_LN2 * (double)total_error_2 / (double)data_len) / M_LN2 : 0.0);
+	residual_bits_per_sample[3] = (float)((total_error_3 > 0) ? log(M_LN2 * (double)total_error_3 / (double)data_len) / M_LN2 : 0.0);
+	residual_bits_per_sample[4] = (float)((total_error_4 > 0) ? log(M_LN2 * (double)total_error_4 / (double)data_len) / M_LN2 : 0.0);
 
 	return order;
 }
 
 FLAC__SSE_TARGET("sse2")
-unsigned FLAC__fixed_compute_best_predictor_wide_intrin_sse2(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1])
+uint32_t FLAC__fixed_compute_best_predictor_wide_intrin_sse2(const FLAC__int32 data[], uint32_t data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1])
 {
 	FLAC__uint64 total_error_0, total_error_1, total_error_2, total_error_3, total_error_4;
-	unsigned i, order;
+	uint32_t i, order;
 
 	__m128i total_err0, total_err1, total_err3;
 
 	{
 		FLAC__int32 itmp;
 		__m128i last_error, zero = _mm_setzero_si128();
-		
+
 		last_error = _mm_cvtsi32_si128(data[-1]);							// 0   0   0   le0
 		itmp = data[-2];
 		last_error = _mm_shuffle_epi32(last_error, _MM_SHUFFLE(2,1,0,0));
@@ -208,7 +247,7 @@
 			total_err1 = _mm_add_epi64(total_err1, err1);					//       te1      te2
 		}
 	}
-	
+
 	m128i_to_i64(total_error_0, total_err0);
 	m128i_to_i64(total_error_4, total_err3);
 	m128i_to_i64(total_error_2, total_err1);
@@ -217,14 +256,14 @@
 	m128i_to_i64(total_error_3, total_err3);
 	m128i_to_i64(total_error_1, total_err1);
 
-	/* prefer higher order */
-	if(total_error_0 < flac_min(flac_min(flac_min(total_error_1, total_error_2), total_error_3), total_error_4))
+	/* prefer lower order */
+	if(total_error_0 <= flac_min(flac_min(flac_min(total_error_1, total_error_2), total_error_3), total_error_4))
 		order = 0;
-	else if(total_error_1 < flac_min(flac_min(total_error_2, total_error_3), total_error_4))
+	else if(total_error_1 <= flac_min(flac_min(total_error_2, total_error_3), total_error_4))
 		order = 1;
-	else if(total_error_2 < flac_min(total_error_3, total_error_4))
+	else if(total_error_2 <= flac_min(total_error_3, total_error_4))
 		order = 2;
-	else if(total_error_3 < total_error_4)
+	else if(total_error_3 <= total_error_4)
 		order = 3;
 	else
 		order = 4;
@@ -238,11 +277,11 @@
 	FLAC__ASSERT(data_len > 0 || total_error_3 == 0);
 	FLAC__ASSERT(data_len > 0 || total_error_4 == 0);
 
-	residual_bits_per_sample[0] = (FLAC__float)((total_error_0 > 0) ? log(M_LN2 * (FLAC__double)total_error_0 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	residual_bits_per_sample[1] = (FLAC__float)((total_error_1 > 0) ? log(M_LN2 * (FLAC__double)total_error_1 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	residual_bits_per_sample[2] = (FLAC__float)((total_error_2 > 0) ? log(M_LN2 * (FLAC__double)total_error_2 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	residual_bits_per_sample[3] = (FLAC__float)((total_error_3 > 0) ? log(M_LN2 * (FLAC__double)total_error_3 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	residual_bits_per_sample[4] = (FLAC__float)((total_error_4 > 0) ? log(M_LN2 * (FLAC__double)total_error_4 / (FLAC__double)data_len) / M_LN2 : 0.0);
+	residual_bits_per_sample[0] = (float)((total_error_0 > 0) ? log(M_LN2 * (double)total_error_0 / (double)data_len) / M_LN2 : 0.0);
+	residual_bits_per_sample[1] = (float)((total_error_1 > 0) ? log(M_LN2 * (double)total_error_1 / (double)data_len) / M_LN2 : 0.0);
+	residual_bits_per_sample[2] = (float)((total_error_2 > 0) ? log(M_LN2 * (double)total_error_2 / (double)data_len) / M_LN2 : 0.0);
+	residual_bits_per_sample[3] = (float)((total_error_3 > 0) ? log(M_LN2 * (double)total_error_3 / (double)data_len) / M_LN2 : 0.0);
+	residual_bits_per_sample[4] = (float)((total_error_4 > 0) ? log(M_LN2 * (double)total_error_4 / (double)data_len) / M_LN2 : 0.0);
 
 	return order;
 }

diff --git a/src/libFLAC/fixed_intrin_ssse3.c b/src/libFLAC/fixed_intrin_ssse3.c
index 50c663d..bbad224 100644
--- a/src/libFLAC/fixed_intrin_ssse3.c
+++ b/src/libFLAC/fixed_intrin_ssse3.c

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2000-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -34,9 +34,11 @@
 #  include <config.h>
 #endif
 
+#include "private/cpu.h"
+
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
 #ifndef FLAC__NO_ASM
-#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
+#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN
 #include "private/fixed.h"
 #ifdef FLAC__SSSE3_SUPPORTED
 
@@ -52,75 +54,103 @@
 #define m128i_to_i64(dest, src) dest = _mm_cvtsi128_si64(src)
 #endif
 
+#ifdef local_abs
+#undef local_abs
+#endif
+#define local_abs(x) ((uint32_t)((x)<0? -(x) : (x)))
+
 FLAC__SSE_TARGET("ssse3")
-unsigned FLAC__fixed_compute_best_predictor_intrin_ssse3(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1])
+uint32_t FLAC__fixed_compute_best_predictor_intrin_ssse3(const FLAC__int32 data[], uint32_t data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1])
 {
 	FLAC__uint32 total_error_0, total_error_1, total_error_2, total_error_3, total_error_4;
-	unsigned i, order;
+	FLAC__int32 i, data_len_int;
+	uint32_t order;
+	__m128i total_err0, total_err1, total_err2, total_err3, total_err4;
+	__m128i prev_err0,  prev_err1,  prev_err2,  prev_err3;
+	__m128i tempA, tempB;
+	FLAC__int32 data_scalar[4];
+	FLAC__int32 prev_err0_scalar[4];
+	FLAC__int32 prev_err1_scalar[4];
+	FLAC__int32 prev_err2_scalar[4];
+	FLAC__int32 prev_err3_scalar[4];
+	total_err0 = _mm_setzero_si128();
+	total_err1 = _mm_setzero_si128();
+	total_err2 = _mm_setzero_si128();
+	total_err3 = _mm_setzero_si128();
+	total_err4 = _mm_setzero_si128();
+	data_len_int = data_len;
 
-	__m128i total_err0, total_err1, total_err2;
+	for(i = 0; i < 4; i++){
+		prev_err0_scalar[i] = data[-1+i*(data_len_int/4)];
+		prev_err1_scalar[i] = data[-1+i*(data_len_int/4)] - data[-2+i*(data_len_int/4)];
+		prev_err2_scalar[i] = prev_err1_scalar[i] - (data[-2+i*(data_len_int/4)] - data[-3+i*(data_len_int/4)]);
+		prev_err3_scalar[i] = prev_err2_scalar[i] - (data[-2+i*(data_len_int/4)] - 2*data[-3+i*(data_len_int/4)] + data[-4+i*(data_len_int/4)]);
+	}
+	prev_err0 = _mm_loadu_si128((const __m128i*)prev_err0_scalar);
+	prev_err1 = _mm_loadu_si128((const __m128i*)prev_err1_scalar);
+	prev_err2 = _mm_loadu_si128((const __m128i*)prev_err2_scalar);
+	prev_err3 = _mm_loadu_si128((const __m128i*)prev_err3_scalar);
+	for(i = 0; i < data_len_int / 4; i++){
+		data_scalar[0] = data[i];
+		data_scalar[1] = data[i+data_len/4];
+		data_scalar[2] = data[i+2*(data_len/4)];
+		data_scalar[3] = data[i+3*(data_len/4)];
+		tempA = _mm_loadu_si128((const __m128i*)data_scalar);
+		tempB = _mm_abs_epi32(tempA);
+		total_err0 = _mm_add_epi32(total_err0,tempB);
+		tempB = _mm_sub_epi32(tempA,prev_err0);
+		prev_err0 = tempA;
+		tempA = _mm_abs_epi32(tempB);
+		total_err1 = _mm_add_epi32(total_err1,tempA);
+		tempA = _mm_sub_epi32(tempB,prev_err1);
+		prev_err1 = tempB;
+		tempB = _mm_abs_epi32(tempA);
+		total_err2 = _mm_add_epi32(total_err2,tempB);
+		tempB = _mm_sub_epi32(tempA,prev_err2);
+		prev_err2 = tempA;
+		tempA = _mm_abs_epi32(tempB);
+		total_err3 = _mm_add_epi32(total_err3,tempA);
+		tempA = _mm_sub_epi32(tempB,prev_err3);
+		prev_err3 = tempB;
+		tempB = _mm_abs_epi32(tempA);
+		total_err4 = _mm_add_epi32(total_err4,tempB);
+	}
+	_mm_storeu_si128((__m128i*)data_scalar,total_err0);
+	total_error_0 = data_scalar[0] + data_scalar[1] + data_scalar[2] + data_scalar[3];
+	_mm_storeu_si128((__m128i*)data_scalar,total_err1);
+	total_error_1 = data_scalar[0] + data_scalar[1] + data_scalar[2] + data_scalar[3];
+	_mm_storeu_si128((__m128i*)data_scalar,total_err2);
+	total_error_2 = data_scalar[0] + data_scalar[1] + data_scalar[2] + data_scalar[3];
+	_mm_storeu_si128((__m128i*)data_scalar,total_err3);
+	total_error_3 = data_scalar[0] + data_scalar[1] + data_scalar[2] + data_scalar[3];
+	_mm_storeu_si128((__m128i*)data_scalar,total_err4);
+	total_error_4 = data_scalar[0] + data_scalar[1] + data_scalar[2] + data_scalar[3];
 
-	{
-		FLAC__int32 itmp;
-		__m128i last_error;
-
-		last_error = _mm_cvtsi32_si128(data[-1]);							// 0   0   0   le0
-		itmp = data[-2];
-		last_error = _mm_shuffle_epi32(last_error, _MM_SHUFFLE(2,1,0,0));
-		last_error = _mm_sub_epi32(last_error, _mm_cvtsi32_si128(itmp));	// 0   0   le0 le1
-		itmp -= data[-3];
-		last_error = _mm_shuffle_epi32(last_error, _MM_SHUFFLE(2,1,0,0));
-		last_error = _mm_sub_epi32(last_error, _mm_cvtsi32_si128(itmp));	// 0   le0 le1 le2
-		itmp -= data[-3] - data[-4];
-		last_error = _mm_shuffle_epi32(last_error, _MM_SHUFFLE(2,1,0,0));
-		last_error = _mm_sub_epi32(last_error, _mm_cvtsi32_si128(itmp));	// le0 le1 le2 le3
-
-		total_err0 = total_err1 = _mm_setzero_si128();
-		for(i = 0; i < data_len; i++) {
-			__m128i err0, err1;
-			err0 = _mm_cvtsi32_si128(data[i]);								// 0   0   0   e0
-			err1 = _mm_shuffle_epi32(err0, _MM_SHUFFLE(0,0,0,0));			// e0  e0  e0  e0
-#if 1 /* OPT_SSE */
-			err1 = _mm_sub_epi32(err1, last_error);
-			last_error = _mm_srli_si128(last_error, 4);						// 0   le0 le1 le2
-			err1 = _mm_sub_epi32(err1, last_error);
-			last_error = _mm_srli_si128(last_error, 4);						// 0   0   le0 le1
-			err1 = _mm_sub_epi32(err1, last_error);
-			last_error = _mm_srli_si128(last_error, 4);						// 0   0   0   le0
-			err1 = _mm_sub_epi32(err1, last_error);							// e1  e2  e3  e4
-#else
-			last_error = _mm_add_epi32(last_error, _mm_srli_si128(last_error, 8));	// le0  le1  le2+le0  le3+le1
-			last_error = _mm_add_epi32(last_error, _mm_srli_si128(last_error, 4));	// le0  le1+le0  le2+le0+le1  le3+le1+le2+le0
-			err1 = _mm_sub_epi32(err1, last_error);							// e1  e2  e3  e4
-#endif
-			last_error = _mm_alignr_epi8(err0, err1, 4);					// e0  e1  e2  e3
-
-			err0 = _mm_abs_epi32(err0);
-			err1 = _mm_abs_epi32(err1);
-
-			total_err0 = _mm_add_epi32(total_err0, err0);					// 0   0   0   te0
-			total_err1 = _mm_add_epi32(total_err1, err1);					// te1 te2 te3 te4
+	/* Now the remainder of samples needs to be processed */
+	i *= 4;
+	if(data_len % 4 > 0){
+		FLAC__int32 last_error_0 = data[i-1];
+		FLAC__int32 last_error_1 = data[i-1] - data[i-2];
+		FLAC__int32 last_error_2 = last_error_1 - (data[i-2] - data[i-3]);
+		FLAC__int32 last_error_3 = last_error_2 - (data[i-2] - 2*data[i-3] + data[i-4]);
+		FLAC__int32 error, save;
+		for(; i < data_len_int; i++) {
+			error  = data[i]     ; total_error_0 += local_abs(error);                      save = error;
+			error -= last_error_0; total_error_1 += local_abs(error); last_error_0 = save; save = error;
+			error -= last_error_1; total_error_2 += local_abs(error); last_error_1 = save; save = error;
+			error -= last_error_2; total_error_3 += local_abs(error); last_error_2 = save; save = error;
+			error -= last_error_3; total_error_4 += local_abs(error); last_error_3 = save;
 		}
 	}
-	
-	total_error_0 = _mm_cvtsi128_si32(total_err0);
-	total_err2 = total_err1;											// te1  te2  te3  te4
-	total_err1 = _mm_srli_si128(total_err1, 8);							//  0    0   te1  te2
-	total_error_4 = _mm_cvtsi128_si32(total_err2);
-	total_error_2 = _mm_cvtsi128_si32(total_err1);
-	total_err2 = _mm_srli_si128(total_err2,	4);							//  0   te1  te2  te3
-	total_err1 = _mm_srli_si128(total_err1, 4);							//  0    0    0   te1
-	total_error_3 = _mm_cvtsi128_si32(total_err2);
-	total_error_1 = _mm_cvtsi128_si32(total_err1);
 
-	/* prefer higher order */
-	if(total_error_0 < flac_min(flac_min(flac_min(total_error_1, total_error_2), total_error_3), total_error_4))
+	/* prefer lower order */
+	if(total_error_0 <= flac_min(flac_min(flac_min(total_error_1, total_error_2), total_error_3), total_error_4))
 		order = 0;
-	else if(total_error_1 < flac_min(flac_min(total_error_2, total_error_3), total_error_4))
+	else if(total_error_1 <= flac_min(flac_min(total_error_2, total_error_3), total_error_4))
 		order = 1;
-	else if(total_error_2 < flac_min(total_error_3, total_error_4))
+	else if(total_error_2 <= flac_min(total_error_3, total_error_4))
 		order = 2;
-	else if(total_error_3 < total_error_4)
+	else if(total_error_3 <= total_error_4)
 		order = 3;
 	else
 		order = 4;
@@ -134,27 +164,27 @@
 	FLAC__ASSERT(data_len > 0 || total_error_3 == 0);
 	FLAC__ASSERT(data_len > 0 || total_error_4 == 0);
 
-	residual_bits_per_sample[0] = (FLAC__float)((total_error_0 > 0) ? log(M_LN2 * (FLAC__double)total_error_0 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	residual_bits_per_sample[1] = (FLAC__float)((total_error_1 > 0) ? log(M_LN2 * (FLAC__double)total_error_1 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	residual_bits_per_sample[2] = (FLAC__float)((total_error_2 > 0) ? log(M_LN2 * (FLAC__double)total_error_2 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	residual_bits_per_sample[3] = (FLAC__float)((total_error_3 > 0) ? log(M_LN2 * (FLAC__double)total_error_3 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	residual_bits_per_sample[4] = (FLAC__float)((total_error_4 > 0) ? log(M_LN2 * (FLAC__double)total_error_4 / (FLAC__double)data_len) / M_LN2 : 0.0);
+	residual_bits_per_sample[0] = (float)((total_error_0 > 0) ? log(M_LN2 * (double)total_error_0 / (double)data_len) / M_LN2 : 0.0);
+	residual_bits_per_sample[1] = (float)((total_error_1 > 0) ? log(M_LN2 * (double)total_error_1 / (double)data_len) / M_LN2 : 0.0);
+	residual_bits_per_sample[2] = (float)((total_error_2 > 0) ? log(M_LN2 * (double)total_error_2 / (double)data_len) / M_LN2 : 0.0);
+	residual_bits_per_sample[3] = (float)((total_error_3 > 0) ? log(M_LN2 * (double)total_error_3 / (double)data_len) / M_LN2 : 0.0);
+	residual_bits_per_sample[4] = (float)((total_error_4 > 0) ? log(M_LN2 * (double)total_error_4 / (double)data_len) / M_LN2 : 0.0);
 
 	return order;
 }
 
 FLAC__SSE_TARGET("ssse3")
-unsigned FLAC__fixed_compute_best_predictor_wide_intrin_ssse3(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1])
+uint32_t FLAC__fixed_compute_best_predictor_wide_intrin_ssse3(const FLAC__int32 data[], uint32_t data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1])
 {
 	FLAC__uint64 total_error_0, total_error_1, total_error_2, total_error_3, total_error_4;
-	unsigned i, order;
+	uint32_t i, order;
 
 	__m128i total_err0, total_err1, total_err3;
 
 	{
 		FLAC__int32 itmp;
 		__m128i last_error, zero = _mm_setzero_si128();
-		
+
 		last_error = _mm_cvtsi32_si128(data[-1]);							// 0   0   0   le0
 		itmp = data[-2];
 		last_error = _mm_shuffle_epi32(last_error, _MM_SHUFFLE(2,1,0,0));
@@ -185,7 +215,7 @@
 			err1 = _mm_sub_epi32(err1, last_error);							// e1  e2  e3  e4
 #endif
 			last_error = _mm_alignr_epi8(err0, err1, 4);					// e0  e1  e2  e3
-			
+
 			err0 = _mm_abs_epi32(err0);
 			err1 = _mm_abs_epi32(err1);										// |e1| |e2| |e3| |e4|
 
@@ -196,7 +226,7 @@
 			total_err1 = _mm_add_epi64(total_err1, err1);					//       te1      te2
 		}
 	}
-	
+
 	m128i_to_i64(total_error_0, total_err0);
 	m128i_to_i64(total_error_4, total_err3);
 	m128i_to_i64(total_error_2, total_err1);
@@ -205,14 +235,14 @@
 	m128i_to_i64(total_error_3, total_err3);
 	m128i_to_i64(total_error_1, total_err1);
 
-	/* prefer higher order */
-	if(total_error_0 < flac_min(flac_min(flac_min(total_error_1, total_error_2), total_error_3), total_error_4))
+	/* prefer lower order */
+	if(total_error_0 <= flac_min(flac_min(flac_min(total_error_1, total_error_2), total_error_3), total_error_4))
 		order = 0;
-	else if(total_error_1 < flac_min(flac_min(total_error_2, total_error_3), total_error_4))
+	else if(total_error_1 <= flac_min(flac_min(total_error_2, total_error_3), total_error_4))
 		order = 1;
-	else if(total_error_2 < flac_min(total_error_3, total_error_4))
+	else if(total_error_2 <= flac_min(total_error_3, total_error_4))
 		order = 2;
-	else if(total_error_3 < total_error_4)
+	else if(total_error_3 <= total_error_4)
 		order = 3;
 	else
 		order = 4;
@@ -226,11 +256,11 @@
 	FLAC__ASSERT(data_len > 0 || total_error_3 == 0);
 	FLAC__ASSERT(data_len > 0 || total_error_4 == 0);
 
-	residual_bits_per_sample[0] = (FLAC__float)((total_error_0 > 0) ? log(M_LN2 * (FLAC__double)total_error_0 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	residual_bits_per_sample[1] = (FLAC__float)((total_error_1 > 0) ? log(M_LN2 * (FLAC__double)total_error_1 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	residual_bits_per_sample[2] = (FLAC__float)((total_error_2 > 0) ? log(M_LN2 * (FLAC__double)total_error_2 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	residual_bits_per_sample[3] = (FLAC__float)((total_error_3 > 0) ? log(M_LN2 * (FLAC__double)total_error_3 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	residual_bits_per_sample[4] = (FLAC__float)((total_error_4 > 0) ? log(M_LN2 * (FLAC__double)total_error_4 / (FLAC__double)data_len) / M_LN2 : 0.0);
+	residual_bits_per_sample[0] = (float)((total_error_0 > 0) ? log(M_LN2 * (double)total_error_0 / (double)data_len) / M_LN2 : 0.0);
+	residual_bits_per_sample[1] = (float)((total_error_1 > 0) ? log(M_LN2 * (double)total_error_1 / (double)data_len) / M_LN2 : 0.0);
+	residual_bits_per_sample[2] = (float)((total_error_2 > 0) ? log(M_LN2 * (double)total_error_2 / (double)data_len) / M_LN2 : 0.0);
+	residual_bits_per_sample[3] = (float)((total_error_3 > 0) ? log(M_LN2 * (double)total_error_3 / (double)data_len) / M_LN2 : 0.0);
+	residual_bits_per_sample[4] = (float)((total_error_4 > 0) ? log(M_LN2 * (double)total_error_4 / (double)data_len) / M_LN2 : 0.0);
 
 	return order;
 }

diff --git a/src/libFLAC/flac.pc.in b/src/libFLAC/flac.pc.in
new file mode 100644
index 0000000..56e8594
--- /dev/null
+++ b/src/libFLAC/flac.pc.in

@@ -0,0 +1,12 @@
+prefix=@prefix@
+exec_prefix=@exec_prefix@
+libdir=@libdir@
+includedir=@includedir@
+
+Name: FLAC
+Description: Free Lossless Audio Codec Library
+Version: @VERSION@
+Requires.private: @OGG_PACKAGE@
+Libs: -L${libdir} -lFLAC
+Libs.private: -lm
+Cflags: -I${includedir}

diff --git a/src/libFLAC/float.c b/src/libFLAC/float.c
index 068069f..1e25827 100644
--- a/src/libFLAC/float.c
+++ b/src/libFLAC/float.c

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2004-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -266,7 +266,7 @@
 };
 #endif
 
-FLAC__uint32 FLAC__fixedpoint_log2(FLAC__uint32 x, unsigned fracbits, unsigned precision)
+FLAC__uint32 FLAC__fixedpoint_log2(FLAC__uint32 x, uint32_t fracbits, uint32_t precision)
 {
 	const FLAC__uint32 ONE = (1u << fracbits);
 	const FLAC__uint32 *table = log2_lookup[fracbits >> 2];

diff --git a/src/libFLAC/format.c b/src/libFLAC/format.c
index 4d0d832..9db749c 100644
--- a/src/libFLAC/format.c
+++ b/src/libFLAC/format.c

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2000-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -39,109 +39,119 @@
 #include <string.h> /* for memset() */
 #include "FLAC/assert.h"
 #include "FLAC/format.h"
+#include "share/alloc.h"
 #include "share/compat.h"
 #include "private/format.h"
 #include "private/macros.h"
 
-/* VERSION should come from configure */
-FLAC_API const char *FLAC__VERSION_STRING = VERSION;
-
-FLAC_API const char *FLAC__VENDOR_STRING = "reference libFLAC " VERSION " 20141125";
+#if (defined GIT_COMMIT_HASH && defined GIT_COMMIT_DATE)
+# ifdef GIT_COMMIT_TAG
+FLAC_API const char *FLAC__VERSION_STRING = GIT_COMMIT_TAG;
+FLAC_API const char *FLAC__VENDOR_STRING = "reference libFLAC " GIT_COMMIT_TAG " " GIT_COMMIT_DATE;
+# else
+FLAC_API const char *FLAC__VERSION_STRING = "git-" GIT_COMMIT_HASH " " GIT_COMMIT_DATE;
+FLAC_API const char *FLAC__VENDOR_STRING = "reference libFLAC git-" GIT_COMMIT_HASH " " GIT_COMMIT_DATE;
+# endif
+#else
+/* PACKAGE_VERSION should come from configure */
+FLAC_API const char *FLAC__VERSION_STRING = PACKAGE_VERSION;
+FLAC_API const char *FLAC__VENDOR_STRING = "reference libFLAC " PACKAGE_VERSION " 20221022";
+#endif
 
 FLAC_API const FLAC__byte FLAC__STREAM_SYNC_STRING[4] = { 'f','L','a','C' };
-FLAC_API const unsigned FLAC__STREAM_SYNC = 0x664C6143;
-FLAC_API const unsigned FLAC__STREAM_SYNC_LEN = 32; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_SYNC = 0x664C6143;
+FLAC_API const uint32_t FLAC__STREAM_SYNC_LEN = 32; /* bits */
 
-FLAC_API const unsigned FLAC__STREAM_METADATA_STREAMINFO_MIN_BLOCK_SIZE_LEN = 16; /* bits */
-FLAC_API const unsigned FLAC__STREAM_METADATA_STREAMINFO_MAX_BLOCK_SIZE_LEN = 16; /* bits */
-FLAC_API const unsigned FLAC__STREAM_METADATA_STREAMINFO_MIN_FRAME_SIZE_LEN = 24; /* bits */
-FLAC_API const unsigned FLAC__STREAM_METADATA_STREAMINFO_MAX_FRAME_SIZE_LEN = 24; /* bits */
-FLAC_API const unsigned FLAC__STREAM_METADATA_STREAMINFO_SAMPLE_RATE_LEN = 20; /* bits */
-FLAC_API const unsigned FLAC__STREAM_METADATA_STREAMINFO_CHANNELS_LEN = 3; /* bits */
-FLAC_API const unsigned FLAC__STREAM_METADATA_STREAMINFO_BITS_PER_SAMPLE_LEN = 5; /* bits */
-FLAC_API const unsigned FLAC__STREAM_METADATA_STREAMINFO_TOTAL_SAMPLES_LEN = 36; /* bits */
-FLAC_API const unsigned FLAC__STREAM_METADATA_STREAMINFO_MD5SUM_LEN = 128; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_STREAMINFO_MIN_BLOCK_SIZE_LEN = 16; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_STREAMINFO_MAX_BLOCK_SIZE_LEN = 16; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_STREAMINFO_MIN_FRAME_SIZE_LEN = 24; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_STREAMINFO_MAX_FRAME_SIZE_LEN = 24; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_STREAMINFO_SAMPLE_RATE_LEN = 20; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_STREAMINFO_CHANNELS_LEN = 3; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_STREAMINFO_BITS_PER_SAMPLE_LEN = 5; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_STREAMINFO_TOTAL_SAMPLES_LEN = 36; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_STREAMINFO_MD5SUM_LEN = 128; /* bits */
 
-FLAC_API const unsigned FLAC__STREAM_METADATA_APPLICATION_ID_LEN = 32; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_APPLICATION_ID_LEN = 32; /* bits */
 
-FLAC_API const unsigned FLAC__STREAM_METADATA_SEEKPOINT_SAMPLE_NUMBER_LEN = 64; /* bits */
-FLAC_API const unsigned FLAC__STREAM_METADATA_SEEKPOINT_STREAM_OFFSET_LEN = 64; /* bits */
-FLAC_API const unsigned FLAC__STREAM_METADATA_SEEKPOINT_FRAME_SAMPLES_LEN = 16; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_SEEKPOINT_SAMPLE_NUMBER_LEN = 64; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_SEEKPOINT_STREAM_OFFSET_LEN = 64; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_SEEKPOINT_FRAME_SAMPLES_LEN = 16; /* bits */
 
 FLAC_API const FLAC__uint64 FLAC__STREAM_METADATA_SEEKPOINT_PLACEHOLDER = FLAC__U64L(0xffffffffffffffff);
 
-FLAC_API const unsigned FLAC__STREAM_METADATA_VORBIS_COMMENT_ENTRY_LENGTH_LEN = 32; /* bits */
-FLAC_API const unsigned FLAC__STREAM_METADATA_VORBIS_COMMENT_NUM_COMMENTS_LEN = 32; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_VORBIS_COMMENT_ENTRY_LENGTH_LEN = 32; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_VORBIS_COMMENT_NUM_COMMENTS_LEN = 32; /* bits */
 
-FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_INDEX_OFFSET_LEN = 64; /* bits */
-FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_INDEX_NUMBER_LEN = 8; /* bits */
-FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_INDEX_RESERVED_LEN = 3*8; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_CUESHEET_INDEX_OFFSET_LEN = 64; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_CUESHEET_INDEX_NUMBER_LEN = 8; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_CUESHEET_INDEX_RESERVED_LEN = 3*8; /* bits */
 
-FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_TRACK_OFFSET_LEN = 64; /* bits */
-FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_TRACK_NUMBER_LEN = 8; /* bits */
-FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_TRACK_ISRC_LEN = 12*8; /* bits */
-FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_TRACK_TYPE_LEN = 1; /* bit */
-FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_TRACK_PRE_EMPHASIS_LEN = 1; /* bit */
-FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_TRACK_RESERVED_LEN = 6+13*8; /* bits */
-FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_TRACK_NUM_INDICES_LEN = 8; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_CUESHEET_TRACK_OFFSET_LEN = 64; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_CUESHEET_TRACK_NUMBER_LEN = 8; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_CUESHEET_TRACK_ISRC_LEN = 12*8; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_CUESHEET_TRACK_TYPE_LEN = 1; /* bit */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_CUESHEET_TRACK_PRE_EMPHASIS_LEN = 1; /* bit */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_CUESHEET_TRACK_RESERVED_LEN = 6+13*8; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_CUESHEET_TRACK_NUM_INDICES_LEN = 8; /* bits */
 
-FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_MEDIA_CATALOG_NUMBER_LEN = 128*8; /* bits */
-FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_LEAD_IN_LEN = 64; /* bits */
-FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_IS_CD_LEN = 1; /* bit */
-FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_RESERVED_LEN = 7+258*8; /* bits */
-FLAC_API const unsigned FLAC__STREAM_METADATA_CUESHEET_NUM_TRACKS_LEN = 8; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_CUESHEET_MEDIA_CATALOG_NUMBER_LEN = 128*8; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_CUESHEET_LEAD_IN_LEN = 64; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_CUESHEET_IS_CD_LEN = 1; /* bit */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_CUESHEET_RESERVED_LEN = 7+258*8; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_CUESHEET_NUM_TRACKS_LEN = 8; /* bits */
 
-FLAC_API const unsigned FLAC__STREAM_METADATA_PICTURE_TYPE_LEN = 32; /* bits */
-FLAC_API const unsigned FLAC__STREAM_METADATA_PICTURE_MIME_TYPE_LENGTH_LEN = 32; /* bits */
-FLAC_API const unsigned FLAC__STREAM_METADATA_PICTURE_DESCRIPTION_LENGTH_LEN = 32; /* bits */
-FLAC_API const unsigned FLAC__STREAM_METADATA_PICTURE_WIDTH_LEN = 32; /* bits */
-FLAC_API const unsigned FLAC__STREAM_METADATA_PICTURE_HEIGHT_LEN = 32; /* bits */
-FLAC_API const unsigned FLAC__STREAM_METADATA_PICTURE_DEPTH_LEN = 32; /* bits */
-FLAC_API const unsigned FLAC__STREAM_METADATA_PICTURE_COLORS_LEN = 32; /* bits */
-FLAC_API const unsigned FLAC__STREAM_METADATA_PICTURE_DATA_LENGTH_LEN = 32; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_PICTURE_TYPE_LEN = 32; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_PICTURE_MIME_TYPE_LENGTH_LEN = 32; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_PICTURE_DESCRIPTION_LENGTH_LEN = 32; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_PICTURE_WIDTH_LEN = 32; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_PICTURE_HEIGHT_LEN = 32; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_PICTURE_DEPTH_LEN = 32; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_PICTURE_COLORS_LEN = 32; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_PICTURE_DATA_LENGTH_LEN = 32; /* bits */
 
-FLAC_API const unsigned FLAC__STREAM_METADATA_IS_LAST_LEN = 1; /* bits */
-FLAC_API const unsigned FLAC__STREAM_METADATA_TYPE_LEN = 7; /* bits */
-FLAC_API const unsigned FLAC__STREAM_METADATA_LENGTH_LEN = 24; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_IS_LAST_LEN = 1; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_TYPE_LEN = 7; /* bits */
+FLAC_API const uint32_t FLAC__STREAM_METADATA_LENGTH_LEN = 24; /* bits */
 
-FLAC_API const unsigned FLAC__FRAME_HEADER_SYNC = 0x3ffe;
-FLAC_API const unsigned FLAC__FRAME_HEADER_SYNC_LEN = 14; /* bits */
-FLAC_API const unsigned FLAC__FRAME_HEADER_RESERVED_LEN = 1; /* bits */
-FLAC_API const unsigned FLAC__FRAME_HEADER_BLOCKING_STRATEGY_LEN = 1; /* bits */
-FLAC_API const unsigned FLAC__FRAME_HEADER_BLOCK_SIZE_LEN = 4; /* bits */
-FLAC_API const unsigned FLAC__FRAME_HEADER_SAMPLE_RATE_LEN = 4; /* bits */
-FLAC_API const unsigned FLAC__FRAME_HEADER_CHANNEL_ASSIGNMENT_LEN = 4; /* bits */
-FLAC_API const unsigned FLAC__FRAME_HEADER_BITS_PER_SAMPLE_LEN = 3; /* bits */
-FLAC_API const unsigned FLAC__FRAME_HEADER_ZERO_PAD_LEN = 1; /* bits */
-FLAC_API const unsigned FLAC__FRAME_HEADER_CRC_LEN = 8; /* bits */
+FLAC_API const uint32_t FLAC__FRAME_HEADER_SYNC = 0x3ffe;
+FLAC_API const uint32_t FLAC__FRAME_HEADER_SYNC_LEN = 14; /* bits */
+FLAC_API const uint32_t FLAC__FRAME_HEADER_RESERVED_LEN = 1; /* bits */
+FLAC_API const uint32_t FLAC__FRAME_HEADER_BLOCKING_STRATEGY_LEN = 1; /* bits */
+FLAC_API const uint32_t FLAC__FRAME_HEADER_BLOCK_SIZE_LEN = 4; /* bits */
+FLAC_API const uint32_t FLAC__FRAME_HEADER_SAMPLE_RATE_LEN = 4; /* bits */
+FLAC_API const uint32_t FLAC__FRAME_HEADER_CHANNEL_ASSIGNMENT_LEN = 4; /* bits */
+FLAC_API const uint32_t FLAC__FRAME_HEADER_BITS_PER_SAMPLE_LEN = 3; /* bits */
+FLAC_API const uint32_t FLAC__FRAME_HEADER_ZERO_PAD_LEN = 1; /* bits */
+FLAC_API const uint32_t FLAC__FRAME_HEADER_CRC_LEN = 8; /* bits */
 
-FLAC_API const unsigned FLAC__FRAME_FOOTER_CRC_LEN = 16; /* bits */
+FLAC_API const uint32_t FLAC__FRAME_FOOTER_CRC_LEN = 16; /* bits */
 
-FLAC_API const unsigned FLAC__ENTROPY_CODING_METHOD_TYPE_LEN = 2; /* bits */
-FLAC_API const unsigned FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ORDER_LEN = 4; /* bits */
-FLAC_API const unsigned FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_PARAMETER_LEN = 4; /* bits */
-FLAC_API const unsigned FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_PARAMETER_LEN = 5; /* bits */
-FLAC_API const unsigned FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_RAW_LEN = 5; /* bits */
+FLAC_API const uint32_t FLAC__ENTROPY_CODING_METHOD_TYPE_LEN = 2; /* bits */
+FLAC_API const uint32_t FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ORDER_LEN = 4; /* bits */
+FLAC_API const uint32_t FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_PARAMETER_LEN = 4; /* bits */
+FLAC_API const uint32_t FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_PARAMETER_LEN = 5; /* bits */
+FLAC_API const uint32_t FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_RAW_LEN = 5; /* bits */
 
-FLAC_API const unsigned FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ESCAPE_PARAMETER = 15; /* == (1<<FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_PARAMETER_LEN)-1 */
-FLAC_API const unsigned FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_ESCAPE_PARAMETER = 31; /* == (1<<FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_PARAMETER_LEN)-1 */
+FLAC_API const uint32_t FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ESCAPE_PARAMETER = 15; /* == (1<<FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_PARAMETER_LEN)-1 */
+FLAC_API const uint32_t FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_ESCAPE_PARAMETER = 31; /* == (1<<FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_PARAMETER_LEN)-1 */
 
 FLAC_API const char * const FLAC__EntropyCodingMethodTypeString[] = {
 	"PARTITIONED_RICE",
 	"PARTITIONED_RICE2"
 };
 
-FLAC_API const unsigned FLAC__SUBFRAME_LPC_QLP_COEFF_PRECISION_LEN = 4; /* bits */
-FLAC_API const unsigned FLAC__SUBFRAME_LPC_QLP_SHIFT_LEN = 5; /* bits */
+FLAC_API const uint32_t FLAC__SUBFRAME_LPC_QLP_COEFF_PRECISION_LEN = 4; /* bits */
+FLAC_API const uint32_t FLAC__SUBFRAME_LPC_QLP_SHIFT_LEN = 5; /* bits */
 
-FLAC_API const unsigned FLAC__SUBFRAME_ZERO_PAD_LEN = 1; /* bits */
-FLAC_API const unsigned FLAC__SUBFRAME_TYPE_LEN = 6; /* bits */
-FLAC_API const unsigned FLAC__SUBFRAME_WASTED_BITS_FLAG_LEN = 1; /* bits */
+FLAC_API const uint32_t FLAC__SUBFRAME_ZERO_PAD_LEN = 1; /* bits */
+FLAC_API const uint32_t FLAC__SUBFRAME_TYPE_LEN = 6; /* bits */
+FLAC_API const uint32_t FLAC__SUBFRAME_WASTED_BITS_FLAG_LEN = 1; /* bits */
 
-FLAC_API const unsigned FLAC__SUBFRAME_TYPE_CONSTANT_BYTE_ALIGNED_MASK = 0x00;
-FLAC_API const unsigned FLAC__SUBFRAME_TYPE_VERBATIM_BYTE_ALIGNED_MASK = 0x02;
-FLAC_API const unsigned FLAC__SUBFRAME_TYPE_FIXED_BYTE_ALIGNED_MASK = 0x10;
-FLAC_API const unsigned FLAC__SUBFRAME_TYPE_LPC_BYTE_ALIGNED_MASK = 0x40;
+FLAC_API const uint32_t FLAC__SUBFRAME_TYPE_CONSTANT_BYTE_ALIGNED_MASK = 0x00;
+FLAC_API const uint32_t FLAC__SUBFRAME_TYPE_VERBATIM_BYTE_ALIGNED_MASK = 0x02;
+FLAC_API const uint32_t FLAC__SUBFRAME_TYPE_FIXED_BYTE_ALIGNED_MASK = 0x10;
+FLAC_API const uint32_t FLAC__SUBFRAME_TYPE_LPC_BYTE_ALIGNED_MASK = 0x40;
 
 FLAC_API const char * const FLAC__SubframeTypeString[] = {
 	"CONSTANT",
@@ -196,7 +206,7 @@
 	"Publisher/Studio logotype"
 };
 
-FLAC_API FLAC__bool FLAC__format_sample_rate_is_valid(unsigned sample_rate)
+FLAC_API FLAC__bool FLAC__format_sample_rate_is_valid(uint32_t sample_rate)
 {
 	if(sample_rate == 0 || sample_rate > FLAC__MAX_SAMPLE_RATE) {
 		return false;
@@ -205,7 +215,7 @@
 		return true;
 }
 
-FLAC_API FLAC__bool FLAC__format_blocksize_is_subset(unsigned blocksize, unsigned sample_rate)
+FLAC_API FLAC__bool FLAC__format_blocksize_is_subset(uint32_t blocksize, uint32_t sample_rate)
 {
 	if(blocksize > 16384)
 		return false;
@@ -215,14 +225,12 @@
 		return true;
 }
 
-FLAC_API FLAC__bool FLAC__format_sample_rate_is_subset(unsigned sample_rate)
+FLAC_API FLAC__bool FLAC__format_sample_rate_is_subset(uint32_t sample_rate)
 {
-	if(
-		!FLAC__format_sample_rate_is_valid(sample_rate) ||
-		(
-			sample_rate >= (1u << 16) &&
-			!(sample_rate % 1000 == 0 || sample_rate % 10 == 0)
-		)
+	if( // sample rate is not subset if
+		!FLAC__format_sample_rate_is_valid(sample_rate) || // sample rate is invalid or
+		sample_rate >= ((1u << 16) * 10) || // sample rate is larger then or equal to 655360 or
+		(sample_rate >= (1u << 16) && sample_rate % 10 != 0) //sample rate is >= 65536 and not divisible by 10
 	) {
 		return false;
 	}
@@ -233,7 +241,7 @@
 /* @@@@ add to unit tests; it is already indirectly tested by the metadata_object tests */
 FLAC_API FLAC__bool FLAC__format_seektable_is_legal(const FLAC__StreamMetadata_SeekTable *seek_table)
 {
-	unsigned i;
+	uint32_t i;
 	FLAC__uint64 prev_sample_number = 0;
 	FLAC__bool got_prev = false;
 
@@ -267,13 +275,16 @@
 }
 
 /* @@@@ add to unit tests; it is already indirectly tested by the metadata_object tests */
-FLAC_API unsigned FLAC__format_seektable_sort(FLAC__StreamMetadata_SeekTable *seek_table)
+FLAC_API uint32_t FLAC__format_seektable_sort(FLAC__StreamMetadata_SeekTable *seek_table)
 {
-	unsigned i, j;
+	uint32_t i, j;
 	FLAC__bool first;
 
 	FLAC__ASSERT(0 != seek_table);
 
+	if (seek_table->num_points == 0)
+		return 0;
+
 	/* sort the seekpoints */
 	qsort(seek_table->points, seek_table->num_points, sizeof(FLAC__StreamMetadata_SeekPoint), (int (*)(const void *, const void *))seekpoint_compare_);
 
@@ -305,7 +316,7 @@
  * and a more clear explanation at the end of this section:
  *   http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
  */
-static unsigned utf8len_(const FLAC__byte *utf8)
+static uint32_t utf8len_(const FLAC__byte *utf8)
 {
 	FLAC__ASSERT(0 != utf8);
 	if ((utf8[0] & 0x80) == 0) {
@@ -355,11 +366,11 @@
 	return true;
 }
 
-FLAC_API FLAC__bool FLAC__format_vorbiscomment_entry_value_is_legal(const FLAC__byte *value, unsigned length)
+FLAC_API FLAC__bool FLAC__format_vorbiscomment_entry_value_is_legal(const FLAC__byte *value, uint32_t length)
 {
-	if(length == (unsigned)(-1)) {
+	if(length == (uint32_t)(-1)) {
 		while(*value) {
-			unsigned n = utf8len_(value);
+			uint32_t n = utf8len_(value);
 			if(n == 0)
 				return false;
 			value += n;
@@ -368,7 +379,7 @@
 	else {
 		const FLAC__byte *end = value + length;
 		while(value < end) {
-			unsigned n = utf8len_(value);
+			uint32_t n = utf8len_(value);
 			if(n == 0)
 				return false;
 			value += n;
@@ -379,7 +390,7 @@
 	return true;
 }
 
-FLAC_API FLAC__bool FLAC__format_vorbiscomment_entry_is_legal(const FLAC__byte *entry, unsigned length)
+FLAC_API FLAC__bool FLAC__format_vorbiscomment_entry_is_legal(const FLAC__byte *entry, uint32_t length)
 {
 	const FLAC__byte *s, *end;
 
@@ -393,7 +404,7 @@
 	s++; /* skip '=' */
 
 	while(s < end) {
-		unsigned n = utf8len_(s);
+		uint32_t n = utf8len_(s);
 		if(n == 0)
 			return false;
 		s += n;
@@ -407,7 +418,7 @@
 /* @@@@ add to unit tests; it is already indirectly tested by the metadata_object tests */
 FLAC_API FLAC__bool FLAC__format_cuesheet_is_legal(const FLAC__StreamMetadata_CueSheet *cue_sheet, FLAC__bool check_cd_da_subset, const char **violation)
 {
-	unsigned i, j;
+	uint32_t i, j;
 
 	if(check_cd_da_subset) {
 		if(cue_sheet->lead_in < 2 * 44100) {
@@ -497,7 +508,7 @@
 	}
 
 	for(b = picture->description; *b; ) {
-		unsigned n = utf8len_(b);
+		uint32_t n = utf8len_(b);
 		if(n == 0) {
 			if(violation) *violation = "description string must be valid UTF-8";
 			return false;
@@ -511,7 +522,7 @@
 /*
  * These routines are private to libFLAC
  */
-unsigned FLAC__format_get_max_rice_partition_order(unsigned blocksize, unsigned predictor_order)
+uint32_t FLAC__format_get_max_rice_partition_order(uint32_t blocksize, uint32_t predictor_order)
 {
 	return
 		FLAC__format_get_max_rice_partition_order_from_blocksize_limited_max_and_predictor_order(
@@ -521,9 +532,9 @@
 		);
 }
 
-unsigned FLAC__format_get_max_rice_partition_order_from_blocksize(unsigned blocksize)
+uint32_t FLAC__format_get_max_rice_partition_order_from_blocksize(uint32_t blocksize)
 {
-	unsigned max_rice_partition_order = 0;
+	uint32_t max_rice_partition_order = 0;
 	while(!(blocksize & 1)) {
 		max_rice_partition_order++;
 		blocksize >>= 1;
@@ -531,9 +542,9 @@
 	return flac_min(FLAC__MAX_RICE_PARTITION_ORDER, max_rice_partition_order);
 }
 
-unsigned FLAC__format_get_max_rice_partition_order_from_blocksize_limited_max_and_predictor_order(unsigned limit, unsigned blocksize, unsigned predictor_order)
+uint32_t FLAC__format_get_max_rice_partition_order_from_blocksize_limited_max_and_predictor_order(uint32_t limit, uint32_t blocksize, uint32_t predictor_order)
 {
-	unsigned max_rice_partition_order = limit;
+	uint32_t max_rice_partition_order = limit;
 
 	while(max_rice_partition_order > 0 && (blocksize >> max_rice_partition_order) <= predictor_order)
 		max_rice_partition_order--;
@@ -566,20 +577,27 @@
 	FLAC__format_entropy_coding_method_partitioned_rice_contents_init(object);
 }
 
-FLAC__bool FLAC__format_entropy_coding_method_partitioned_rice_contents_ensure_size(FLAC__EntropyCodingMethod_PartitionedRiceContents *object, unsigned max_partition_order)
+#if defined(_MSC_VER)
+// silence three MSVC warnings 'result of 32-bit shift implicitly converted to 64 bits (was 64-bit shift intended?)'
+#pragma warning ( disable : 4334 )
+#endif
+
+FLAC__bool FLAC__format_entropy_coding_method_partitioned_rice_contents_ensure_size(FLAC__EntropyCodingMethod_PartitionedRiceContents *object, uint32_t max_partition_order)
 {
 	FLAC__ASSERT(0 != object);
 
-	FLAC__ASSERT(object->capacity_by_order > 0 || (0 == object->parameters && 0 == object->raw_bits));
-
-	if(object->capacity_by_order < max_partition_order) {
-		if(0 == (object->parameters = realloc(object->parameters, sizeof(unsigned)*(1 << max_partition_order))))
+	if(object->capacity_by_order < max_partition_order || object->parameters == NULL || object->raw_bits == NULL) {
+		if(0 == (object->parameters = safe_realloc_(object->parameters, sizeof(uint32_t)*(1 << max_partition_order))))
 			return false;
-		if(0 == (object->raw_bits = realloc(object->raw_bits, sizeof(unsigned)*(1 << max_partition_order))))
+		if(0 == (object->raw_bits = safe_realloc_(object->raw_bits, sizeof(uint32_t)*(1 << max_partition_order))))
 			return false;
-		memset(object->raw_bits, 0, sizeof(unsigned)*(1 << max_partition_order));
+		memset(object->raw_bits, 0, sizeof(uint32_t)*(1 << max_partition_order));
 		object->capacity_by_order = max_partition_order;
 	}
 
 	return true;
 }
+
+#if defined(_MSC_VER)
+#pragma warning ( default : 4334 )
+#endif

diff --git a/src/libFLAC/ia32/cpu_asm.nasm b/src/libFLAC/ia32/cpu_asm.nasm
deleted file mode 100644
index 036e865..0000000
--- a/src/libFLAC/ia32/cpu_asm.nasm
+++ /dev/null

@@ -1,98 +0,0 @@
-;  vim:filetype=nasm ts=8
-
-;  libFLAC - Free Lossless Audio Codec library
-;  Copyright (C) 2001-2009  Josh Coalson
-;  Copyright (C) 2011-2014  Xiph.Org Foundation
-;
-;  Redistribution and use in source and binary forms, with or without
-;  modification, are permitted provided that the following conditions
-;  are met:
-;
-;  - Redistributions of source code must retain the above copyright
-;  notice, this list of conditions and the following disclaimer.
-;
-;  - Redistributions in binary form must reproduce the above copyright
-;  notice, this list of conditions and the following disclaimer in the
-;  documentation and/or other materials provided with the distribution.
-;
-;  - Neither the name of the Xiph.org Foundation nor the names of its
-;  contributors may be used to endorse or promote products derived from
-;  this software without specific prior written permission.
-;
-;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-;  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-;  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
-;  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-;  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-;  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-;  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-;  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-;  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-;  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-%include "nasm.h"
-
-	data_section
-
-cglobal FLAC__cpu_have_cpuid_asm_ia32
-cglobal FLAC__cpu_info_asm_ia32
-
-	code_section
-
-; **********************************************************************
-;
-; FLAC__uint32 FLAC__cpu_have_cpuid_asm_ia32()
-;
-
-cident FLAC__cpu_have_cpuid_asm_ia32
-	pushfd
-	pop	eax
-	mov	edx, eax
-	xor	eax, 0x00200000
-	push	eax
-	popfd
-	pushfd
-	pop	eax
-	xor	eax, edx
-	and	eax, 0x00200000
-	shr	eax, 0x15
-	push	edx
-	popfd
-	ret
-
-; **********************************************************************
-;
-; void FLAC__cpu_info_asm_ia32(FLAC__uint32 *flags_edx, FLAC__uint32 *flags_ecx)
-;
-
-cident FLAC__cpu_info_asm_ia32
-	;[esp + 8] == flags_edx
-	;[esp + 12] == flags_ecx
-
-	push	ebx
-	call	FLAC__cpu_have_cpuid_asm_ia32
-	test	eax, eax
-	jz	.no_cpuid
-	mov	eax, 0
-	cpuid
-	cmp	eax, 1
-	jb	.no_cpuid
-	mov	eax, 1
-	cpuid
-	mov	ebx, [esp + 8]
-	mov	[ebx], edx
-	mov	ebx, [esp + 12]
-	mov	[ebx], ecx
-	jmp	.end
-.no_cpuid:
-	xor	eax, eax
-	mov	ebx, [esp + 8]
-	mov	[ebx], eax
-	mov	ebx, [esp + 12]
-	mov	[ebx], eax
-.end:
-	pop	ebx
-	ret
-
-; end

diff --git a/src/libFLAC/ia32/fixed_asm.nasm b/src/libFLAC/ia32/fixed_asm.nasm
deleted file mode 100644
index 402c02a..0000000
--- a/src/libFLAC/ia32/fixed_asm.nasm
+++ /dev/null

@@ -1,309 +0,0 @@
-;  vim:filetype=nasm ts=8
-
-;  libFLAC - Free Lossless Audio Codec library
-;  Copyright (C) 2001-2009  Josh Coalson
-;  Copyright (C) 2011-2014  Xiph.Org Foundation
-;
-;  Redistribution and use in source and binary forms, with or without
-;  modification, are permitted provided that the following conditions
-;  are met:
-;
-;  - Redistributions of source code must retain the above copyright
-;  notice, this list of conditions and the following disclaimer.
-;
-;  - Redistributions in binary form must reproduce the above copyright
-;  notice, this list of conditions and the following disclaimer in the
-;  documentation and/or other materials provided with the distribution.
-;
-;  - Neither the name of the Xiph.org Foundation nor the names of its
-;  contributors may be used to endorse or promote products derived from
-;  this software without specific prior written permission.
-;
-;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-;  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-;  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
-;  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-;  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-;  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-;  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-;  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-;  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-;  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-%include "nasm.h"
-
-	data_section
-
-cglobal FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov
-
-	code_section
-
-; **********************************************************************
-;
-; unsigned FLAC__fixed_compute_best_predictor(const FLAC__int32 *data, unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1])
-; {
-; 	FLAC__int32 last_error_0 = data[-1];
-; 	FLAC__int32 last_error_1 = data[-1] - data[-2];
-; 	FLAC__int32 last_error_2 = last_error_1 - (data[-2] - data[-3]);
-; 	FLAC__int32 last_error_3 = last_error_2 - (data[-2] - 2*data[-3] + data[-4]);
-; 	FLAC__int32 error, save;
-; 	FLAC__uint32 total_error_0 = 0, total_error_1 = 0, total_error_2 = 0, total_error_3 = 0, total_error_4 = 0;
-; 	unsigned i, order;
-;
-; 	for(i = 0; i < data_len; i++) {
-; 		error  = data[i]     ; total_error_0 += local_abs(error);                      save = error;
-; 		error -= last_error_0; total_error_1 += local_abs(error); last_error_0 = save; save = error;
-; 		error -= last_error_1; total_error_2 += local_abs(error); last_error_1 = save; save = error;
-; 		error -= last_error_2; total_error_3 += local_abs(error); last_error_2 = save; save = error;
-; 		error -= last_error_3; total_error_4 += local_abs(error); last_error_3 = save;
-; 	}
-;
-; 	if(total_error_0 < min(min(min(total_error_1, total_error_2), total_error_3), total_error_4))
-; 		order = 0;
-; 	else if(total_error_1 < min(min(total_error_2, total_error_3), total_error_4))
-; 		order = 1;
-; 	else if(total_error_2 < min(total_error_3, total_error_4))
-; 		order = 2;
-; 	else if(total_error_3 < total_error_4)
-; 		order = 3;
-; 	else
-; 		order = 4;
-;
-; 	residual_bits_per_sample[0] = (FLAC__float)((data_len > 0 && total_error_0 > 0) ? log(M_LN2 * (FLAC__double)total_error_0 / (FLAC__double)data_len) / M_LN2 : 0.0);
-; 	residual_bits_per_sample[1] = (FLAC__float)((data_len > 0 && total_error_1 > 0) ? log(M_LN2 * (FLAC__double)total_error_1 / (FLAC__double)data_len) / M_LN2 : 0.0);
-; 	residual_bits_per_sample[2] = (FLAC__float)((data_len > 0 && total_error_2 > 0) ? log(M_LN2 * (FLAC__double)total_error_2 / (FLAC__double)data_len) / M_LN2 : 0.0);
-; 	residual_bits_per_sample[3] = (FLAC__float)((data_len > 0 && total_error_3 > 0) ? log(M_LN2 * (FLAC__double)total_error_3 / (FLAC__double)data_len) / M_LN2 : 0.0);
-; 	residual_bits_per_sample[4] = (FLAC__float)((data_len > 0 && total_error_4 > 0) ? log(M_LN2 * (FLAC__double)total_error_4 / (FLAC__double)data_len) / M_LN2 : 0.0);
-;
-; 	return order;
-; }
-	ALIGN 16
-cident FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov
-
-	; esp + 36 == data[]
-	; esp + 40 == data_len
-	; esp + 44 == residual_bits_per_sample[]
-
-	push	ebp
-	push	ebx
-	push	esi
-	push	edi
-	sub	esp, byte 16
-	; qword [esp] == temp space for loading FLAC__uint64s to FPU regs
-
-	; ebx == &data[i]
-	; ecx == loop counter (i)
-	; ebp == order
-	; mm0 == total_error_1:total_error_0
-	; mm1 == total_error_2:total_error_3
-	; mm2 == :total_error_4
-	; mm3 == last_error_1:last_error_0
-	; mm4 == last_error_2:last_error_3
-
-	mov	ecx, [esp + 40]			; ecx = data_len
-	test	ecx, ecx
-	jz	near .data_len_is_0
-
-	mov	ebx, [esp + 36]			; ebx = data[]
-	movd	mm3, [ebx - 4]			; mm3 = 0:last_error_0
-	movd	mm2, [ebx - 8]			; mm2 = 0:data[-2]
-	movd	mm1, [ebx - 12]			; mm1 = 0:data[-3]
-	movd	mm0, [ebx - 16]			; mm0 = 0:data[-4]
-	movq	mm5, mm3			; mm5 = 0:last_error_0
-	psubd	mm5, mm2			; mm5 = 0:last_error_1
-	punpckldq	mm3, mm5		; mm3 = last_error_1:last_error_0
-	psubd	mm2, mm1			; mm2 = 0:data[-2] - data[-3]
-	psubd	mm5, mm2			; mm5 = 0:last_error_2
-	movq	mm4, mm5			; mm4 = 0:last_error_2
-	psubd	mm4, mm2			; mm4 = 0:last_error_2 - (data[-2] - data[-3])
-	paddd	mm4, mm1			; mm4 = 0:last_error_2 - (data[-2] - 2 * data[-3])
-	psubd	mm4, mm0			; mm4 = 0:last_error_3
-	punpckldq	mm4, mm5		; mm4 = last_error_2:last_error_3
-	pxor	mm0, mm0			; mm0 = total_error_1:total_error_0
-	pxor	mm1, mm1			; mm1 = total_error_2:total_error_3
-	pxor	mm2, mm2			; mm2 = 0:total_error_4
-
-	ALIGN 16
-.loop:
-	movd	mm7, [ebx]			; mm7 = 0:error_0
-	add	ebx, byte 4
-	movq	mm6, mm7			; mm6 = 0:error_0
-	psubd	mm7, mm3			; mm7 = :error_1
-	punpckldq	mm6, mm7		; mm6 = error_1:error_0
-	movq	mm5, mm6			; mm5 = error_1:error_0
-	movq	mm7, mm6			; mm7 = error_1:error_0
-	psubd	mm5, mm3			; mm5 = error_2:
-	movq	mm3, mm6			; mm3 = error_1:error_0	
-	psrad	mm6, 31
-	pxor	mm7, mm6
-	psubd	mm7, mm6			; mm7 = abs(error_1):abs(error_0)
-	paddd	mm0, mm7			; mm0 = total_error_1:total_error_0
-	movq	mm6, mm5			; mm6 = error_2:
-	psubd	mm5, mm4			; mm5 = error_3:
-	punpckhdq	mm5, mm6		; mm5 = error_2:error_3
-	movq	mm7, mm5			; mm7 = error_2:error_3
-	movq	mm6, mm5			; mm6 = error_2:error_3
-	psubd	mm5, mm4			; mm5 = :error_4
-	movq	mm4, mm6			; mm4 = error_2:error_3
-	psrad	mm6, 31
-	pxor	mm7, mm6
-	psubd	mm7, mm6			; mm7 = abs(error_2):abs(error_3)
-	paddd	mm1, mm7			; mm1 = total_error_2:total_error_3
-	movq	mm6, mm5			; mm6 = :error_4
-	psrad	mm5, 31
-	pxor	mm6, mm5
-	psubd	mm6, mm5			; mm6 = :abs(error_4)
-	paddd	mm2, mm6			; mm2 = :total_error_4
-	
-	dec	ecx
-	jnz	short .loop
-
-; 	if(total_error_0 < min(min(min(total_error_1, total_error_2), total_error_3), total_error_4))
-; 		order = 0;
-; 	else if(total_error_1 < min(min(total_error_2, total_error_3), total_error_4))
-; 		order = 1;
-; 	else if(total_error_2 < min(total_error_3, total_error_4))
-; 		order = 2;
-; 	else if(total_error_3 < total_error_4)
-; 		order = 3;
-; 	else
-; 		order = 4;
-	movq	mm3, mm0			; mm3 = total_error_1:total_error_0
-	movd	edi, mm2			; edi = total_error_4
-	movd	esi, mm1			; esi = total_error_3
-	movd	eax, mm0			; eax = total_error_0
-	punpckhdq	mm1, mm1		; mm1 = total_error_2:total_error_2
-	punpckhdq	mm3, mm3		; mm3 = total_error_1:total_error_1
-	movd	edx, mm1			; edx = total_error_2
-	movd	ecx, mm3			; ecx = total_error_1
-
-	xor	ebx, ebx
-	xor	ebp, ebp
-	inc	ebx
-	cmp	ecx, eax
-	cmovb	eax, ecx			; eax = min(total_error_0, total_error_1)
-	cmovbe	ebp, ebx
-	inc	ebx
-	cmp	edx, eax
-	cmovb	eax, edx			; eax = min(total_error_0, total_error_1, total_error_2)
-	cmovbe	ebp, ebx
-	inc	ebx
-	cmp	esi, eax
-	cmovb	eax, esi			; eax = min(total_error_0, total_error_1, total_error_2, total_error_3)
-	cmovbe	ebp, ebx
-	inc	ebx
-	cmp	edi, eax
-	cmovb	eax, edi			; eax = min(total_error_0, total_error_1, total_error_2, total_error_3, total_error_4)
-	cmovbe	ebp, ebx
-	movd	ebx, mm0			; ebx = total_error_0
-	emms
-
-	; 	residual_bits_per_sample[0] = (FLAC__float)((data_len > 0 && total_error_0 > 0) ? log(M_LN2 * (FLAC__double)total_error_0 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	; 	residual_bits_per_sample[1] = (FLAC__float)((data_len > 0 && total_error_1 > 0) ? log(M_LN2 * (FLAC__double)total_error_1 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	; 	residual_bits_per_sample[2] = (FLAC__float)((data_len > 0 && total_error_2 > 0) ? log(M_LN2 * (FLAC__double)total_error_2 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	; 	residual_bits_per_sample[3] = (FLAC__float)((data_len > 0 && total_error_3 > 0) ? log(M_LN2 * (FLAC__double)total_error_3 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	; 	residual_bits_per_sample[4] = (FLAC__float)((data_len > 0 && total_error_4 > 0) ? log(M_LN2 * (FLAC__double)total_error_4 / (FLAC__double)data_len) / M_LN2 : 0.0);
-	xor	eax, eax
-	fild	dword [esp + 40]		; ST = data_len (NOTE: assumes data_len is <2gigs)
-.rbps_0:
-	test	ebx, ebx
-	jz	.total_error_0_is_0
-	fld1					; ST = 1.0 data_len
-	mov	[esp], ebx
-	mov	[esp + 4], eax			; [esp] = (FLAC__uint64)total_error_0
-	mov	ebx, [esp + 44]
-	fild	qword [esp]			; ST = total_error_0 1.0 data_len
-	fdiv	st2				; ST = total_error_0/data_len 1.0 data_len
-	fldln2					; ST = ln2 total_error_0/data_len 1.0 data_len
-	fmulp	st1				; ST = ln2*total_error_0/data_len 1.0 data_len
-	fyl2x					; ST = log2(ln2*total_error_0/data_len) data_len
-	fstp	dword [ebx]			; residual_bits_per_sample[0] = log2(ln2*total_error_0/data_len)   ST = data_len
-	jmp	short .rbps_1
-.total_error_0_is_0:
-	mov	ebx, [esp + 44]
-	mov	[ebx], eax			; residual_bits_per_sample[0] = 0.0
-.rbps_1:
-	test	ecx, ecx
-	jz	.total_error_1_is_0
-	fld1					; ST = 1.0 data_len
-	mov	[esp], ecx
-	mov	[esp + 4], eax			; [esp] = (FLAC__uint64)total_error_1
-	fild	qword [esp]			; ST = total_error_1 1.0 data_len
-	fdiv	st2				; ST = total_error_1/data_len 1.0 data_len
-	fldln2					; ST = ln2 total_error_1/data_len 1.0 data_len
-	fmulp	st1				; ST = ln2*total_error_1/data_len 1.0 data_len
-	fyl2x					; ST = log2(ln2*total_error_1/data_len) data_len
-	fstp	dword [ebx + 4]			; residual_bits_per_sample[1] = log2(ln2*total_error_1/data_len)   ST = data_len
-	jmp	short .rbps_2
-.total_error_1_is_0:
-	mov	[ebx + 4], eax			; residual_bits_per_sample[1] = 0.0
-.rbps_2:
-	test	edx, edx
-	jz	.total_error_2_is_0
-	fld1					; ST = 1.0 data_len
-	mov	[esp], edx
-	mov	[esp + 4], eax			; [esp] = (FLAC__uint64)total_error_2
-	fild	qword [esp]			; ST = total_error_2 1.0 data_len
-	fdiv	st2				; ST = total_error_2/data_len 1.0 data_len
-	fldln2					; ST = ln2 total_error_2/data_len 1.0 data_len
-	fmulp	st1				; ST = ln2*total_error_2/data_len 1.0 data_len
-	fyl2x					; ST = log2(ln2*total_error_2/data_len) data_len
-	fstp	dword [ebx + 8]			; residual_bits_per_sample[2] = log2(ln2*total_error_2/data_len)   ST = data_len
-	jmp	short .rbps_3
-.total_error_2_is_0:
-	mov	[ebx + 8], eax			; residual_bits_per_sample[2] = 0.0
-.rbps_3:
-	test	esi, esi
-	jz	.total_error_3_is_0
-	fld1					; ST = 1.0 data_len
-	mov	[esp], esi
-	mov	[esp + 4], eax			; [esp] = (FLAC__uint64)total_error_3
-	fild	qword [esp]			; ST = total_error_3 1.0 data_len
-	fdiv	st2				; ST = total_error_3/data_len 1.0 data_len
-	fldln2					; ST = ln2 total_error_3/data_len 1.0 data_len
-	fmulp	st1				; ST = ln2*total_error_3/data_len 1.0 data_len
-	fyl2x					; ST = log2(ln2*total_error_3/data_len) data_len
-	fstp	dword [ebx + 12]		; residual_bits_per_sample[3] = log2(ln2*total_error_3/data_len)   ST = data_len
-	jmp	short .rbps_4
-.total_error_3_is_0:
-	mov	[ebx + 12], eax			; residual_bits_per_sample[3] = 0.0
-.rbps_4:
-	test	edi, edi
-	jz	.total_error_4_is_0
-	fld1					; ST = 1.0 data_len
-	mov	[esp], edi
-	mov	[esp + 4], eax			; [esp] = (FLAC__uint64)total_error_4
-	fild	qword [esp]			; ST = total_error_4 1.0 data_len
-	fdiv	st2				; ST = total_error_4/data_len 1.0 data_len
-	fldln2					; ST = ln2 total_error_4/data_len 1.0 data_len
-	fmulp	st1				; ST = ln2*total_error_4/data_len 1.0 data_len
-	fyl2x					; ST = log2(ln2*total_error_4/data_len) data_len
-	fstp	dword [ebx + 16]		; residual_bits_per_sample[4] = log2(ln2*total_error_4/data_len)   ST = data_len
-	jmp	short .rbps_end
-.total_error_4_is_0:
-	mov	[ebx + 16], eax			; residual_bits_per_sample[4] = 0.0
-.rbps_end:
-	fstp	st0				; ST = [empty]
-	jmp	short .end
-.data_len_is_0:
-	; data_len == 0, so residual_bits_per_sample[*] = 0.0
-	xor	ebp, ebp
-	mov	edi, [esp + 44]
-	mov	[edi], ebp
-	mov	[edi + 4], ebp
-	mov	[edi + 8], ebp
-	mov	[edi + 12], ebp
-	mov	[edi + 16], ebp
-	add	ebp, byte 4			; order = 4
-
-.end:
-	mov	eax, ebp			; return order
-	add	esp, byte 16
-	pop	edi
-	pop	esi
-	pop	ebx
-	pop	ebp
-	ret
-
-; end

diff --git a/src/libFLAC/ia32/lpc_asm.nasm b/src/libFLAC/ia32/lpc_asm.nasm
deleted file mode 100644
index bf65032..0000000
--- a/src/libFLAC/ia32/lpc_asm.nasm
+++ /dev/null

@@ -1,2049 +0,0 @@
-;  vim:filetype=nasm ts=8
-
-;  libFLAC - Free Lossless Audio Codec library
-;  Copyright (C) 2001-2009  Josh Coalson
-;  Copyright (C) 2011-2014  Xiph.Org Foundation
-;
-;  Redistribution and use in source and binary forms, with or without
-;  modification, are permitted provided that the following conditions
-;  are met:
-;
-;  - Redistributions of source code must retain the above copyright
-;  notice, this list of conditions and the following disclaimer.
-;
-;  - Redistributions in binary form must reproduce the above copyright
-;  notice, this list of conditions and the following disclaimer in the
-;  documentation and/or other materials provided with the distribution.
-;
-;  - Neither the name of the Xiph.org Foundation nor the names of its
-;  contributors may be used to endorse or promote products derived from
-;  this software without specific prior written permission.
-;
-;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-;  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-;  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
-;  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-;  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-;  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-;  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-;  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-;  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-;  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-%include "nasm.h"
-
-	data_section
-
-cglobal FLAC__lpc_compute_autocorrelation_asm_ia32
-cglobal FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_4
-cglobal FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_8
-cglobal FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_12
-cglobal FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_16
-cglobal FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32
-cglobal FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx
-cglobal FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32
-cglobal FLAC__lpc_restore_signal_asm_ia32
-cglobal FLAC__lpc_restore_signal_asm_ia32_mmx
-cglobal FLAC__lpc_restore_signal_wide_asm_ia32
-
-	code_section
-
-; **********************************************************************
-;
-; void FLAC__lpc_compute_autocorrelation_asm(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
-; {
-;	FLAC__real d;
-;	unsigned sample, coeff;
-;	const unsigned limit = data_len - lag;
-;
-;	FLAC__ASSERT(lag > 0);
-;	FLAC__ASSERT(lag <= data_len);
-;
-;	for(coeff = 0; coeff < lag; coeff++)
-;		autoc[coeff] = 0.0;
-;	for(sample = 0; sample <= limit; sample++) {
-;		d = data[sample];
-;		for(coeff = 0; coeff < lag; coeff++)
-;			autoc[coeff] += d * data[sample+coeff];
-;	}
-;	for(; sample < data_len; sample++) {
-;		d = data[sample];
-;		for(coeff = 0; coeff < data_len - sample; coeff++)
-;			autoc[coeff] += d * data[sample+coeff];
-;	}
-; }
-;
-	ALIGN 16
-cident FLAC__lpc_compute_autocorrelation_asm_ia32
-	;[esp + 28] == autoc[]
-	;[esp + 24] == lag
-	;[esp + 20] == data_len
-	;[esp + 16] == data[]
-
-	;ASSERT(lag > 0)
-	;ASSERT(lag <= 33)
-	;ASSERT(lag <= data_len)
-
-.begin:
-	push	esi
-	push	edi
-	push	ebx
-
-	;	for(coeff = 0; coeff < lag; coeff++)
-	;		autoc[coeff] = 0.0;
-	mov	edi, [esp + 28]			; edi == autoc
-	mov	ecx, [esp + 24]			; ecx = # of dwords (=lag) of 0 to write
-	xor	eax, eax
-	rep	stosd
-
-	;	const unsigned limit = data_len - lag;
-	mov	eax, [esp + 24]			; eax == lag
-	mov	ecx, [esp + 20]
-	sub	ecx, eax			; ecx == limit
-
-	mov	edi, [esp + 28]			; edi == autoc
-	mov	esi, [esp + 16]			; esi == data
-	inc	ecx				; we are looping <= limit so we add one to the counter
-
-	;	for(sample = 0; sample <= limit; sample++) {
-	;		d = data[sample];
-	;		for(coeff = 0; coeff < lag; coeff++)
-	;			autoc[coeff] += d * data[sample+coeff];
-	;	}
-	fld	dword [esi]			; ST = d <- data[sample]
-	; each iteration is 11 bytes so we need (-eax)*11, so we do (-12*eax + eax)
-	lea	edx, [eax + eax*2]
-	neg	edx
-	lea	edx, [eax + edx*4 + .jumper1_0 - .get_eip1]
-	call	.mov_eip_to_ebx
-.get_eip1:
-	add	edx, ebx
-	inc	edx				; compensate for the shorter opcode on the last iteration
-	inc	edx				; compensate for the shorter opcode on the last iteration
-	inc	edx				; compensate for the shorter opcode on the last iteration
-	cmp	eax, 33
-	jne	.loop1_start
-	sub	edx, byte 9			; compensate for the longer opcodes on the first iteration
-.loop1_start:
-	jmp	edx
-
-.mov_eip_to_ebx:
-	mov	ebx, [esp]
-	ret
-
-	fld	st0				; ST = d d
-	fmul	dword [esi + (32*4)]		; ST = d*data[sample+32] d		WATCHOUT: not a byte displacement here!
-	fadd	dword [edi + (32*4)]		; ST = autoc[32]+d*data[sample+32] d	WATCHOUT: not a byte displacement here!
-	fstp	dword [edi + (32*4)]		; autoc[32]+=d*data[sample+32]  ST = d	WATCHOUT: not a byte displacement here!
-	fld	st0				; ST = d d
-	fmul	dword [esi + (31*4)]		; ST = d*data[sample+31] d
-	fadd	dword [edi + (31*4)]		; ST = autoc[31]+d*data[sample+31] d
-	fstp	dword [edi + (31*4)]		; autoc[31]+=d*data[sample+31]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (30*4)]		; ST = d*data[sample+30] d
-	fadd	dword [edi + (30*4)]		; ST = autoc[30]+d*data[sample+30] d
-	fstp	dword [edi + (30*4)]		; autoc[30]+=d*data[sample+30]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (29*4)]		; ST = d*data[sample+29] d
-	fadd	dword [edi + (29*4)]		; ST = autoc[29]+d*data[sample+29] d
-	fstp	dword [edi + (29*4)]		; autoc[29]+=d*data[sample+29]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (28*4)]		; ST = d*data[sample+28] d
-	fadd	dword [edi + (28*4)]		; ST = autoc[28]+d*data[sample+28] d
-	fstp	dword [edi + (28*4)]		; autoc[28]+=d*data[sample+28]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (27*4)]		; ST = d*data[sample+27] d
-	fadd	dword [edi + (27*4)]		; ST = autoc[27]+d*data[sample+27] d
-	fstp	dword [edi + (27*4)]		; autoc[27]+=d*data[sample+27]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (26*4)]		; ST = d*data[sample+26] d
-	fadd	dword [edi + (26*4)]		; ST = autoc[26]+d*data[sample+26] d
-	fstp	dword [edi + (26*4)]		; autoc[26]+=d*data[sample+26]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (25*4)]		; ST = d*data[sample+25] d
-	fadd	dword [edi + (25*4)]		; ST = autoc[25]+d*data[sample+25] d
-	fstp	dword [edi + (25*4)]		; autoc[25]+=d*data[sample+25]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (24*4)]		; ST = d*data[sample+24] d
-	fadd	dword [edi + (24*4)]		; ST = autoc[24]+d*data[sample+24] d
-	fstp	dword [edi + (24*4)]		; autoc[24]+=d*data[sample+24]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (23*4)]		; ST = d*data[sample+23] d
-	fadd	dword [edi + (23*4)]		; ST = autoc[23]+d*data[sample+23] d
-	fstp	dword [edi + (23*4)]		; autoc[23]+=d*data[sample+23]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (22*4)]		; ST = d*data[sample+22] d
-	fadd	dword [edi + (22*4)]		; ST = autoc[22]+d*data[sample+22] d
-	fstp	dword [edi + (22*4)]		; autoc[22]+=d*data[sample+22]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (21*4)]		; ST = d*data[sample+21] d
-	fadd	dword [edi + (21*4)]		; ST = autoc[21]+d*data[sample+21] d
-	fstp	dword [edi + (21*4)]		; autoc[21]+=d*data[sample+21]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (20*4)]		; ST = d*data[sample+20] d
-	fadd	dword [edi + (20*4)]		; ST = autoc[20]+d*data[sample+20] d
-	fstp	dword [edi + (20*4)]		; autoc[20]+=d*data[sample+20]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (19*4)]		; ST = d*data[sample+19] d
-	fadd	dword [edi + (19*4)]		; ST = autoc[19]+d*data[sample+19] d
-	fstp	dword [edi + (19*4)]		; autoc[19]+=d*data[sample+19]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (18*4)]		; ST = d*data[sample+18] d
-	fadd	dword [edi + (18*4)]		; ST = autoc[18]+d*data[sample+18] d
-	fstp	dword [edi + (18*4)]		; autoc[18]+=d*data[sample+18]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (17*4)]		; ST = d*data[sample+17] d
-	fadd	dword [edi + (17*4)]		; ST = autoc[17]+d*data[sample+17] d
-	fstp	dword [edi + (17*4)]		; autoc[17]+=d*data[sample+17]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (16*4)]		; ST = d*data[sample+16] d
-	fadd	dword [edi + (16*4)]		; ST = autoc[16]+d*data[sample+16] d
-	fstp	dword [edi + (16*4)]		; autoc[16]+=d*data[sample+16]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (15*4)]		; ST = d*data[sample+15] d
-	fadd	dword [edi + (15*4)]		; ST = autoc[15]+d*data[sample+15] d
-	fstp	dword [edi + (15*4)]		; autoc[15]+=d*data[sample+15]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (14*4)]		; ST = d*data[sample+14] d
-	fadd	dword [edi + (14*4)]		; ST = autoc[14]+d*data[sample+14] d
-	fstp	dword [edi + (14*4)]		; autoc[14]+=d*data[sample+14]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (13*4)]		; ST = d*data[sample+13] d
-	fadd	dword [edi + (13*4)]		; ST = autoc[13]+d*data[sample+13] d
-	fstp	dword [edi + (13*4)]		; autoc[13]+=d*data[sample+13]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (12*4)]		; ST = d*data[sample+12] d
-	fadd	dword [edi + (12*4)]		; ST = autoc[12]+d*data[sample+12] d
-	fstp	dword [edi + (12*4)]		; autoc[12]+=d*data[sample+12]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (11*4)]		; ST = d*data[sample+11] d
-	fadd	dword [edi + (11*4)]		; ST = autoc[11]+d*data[sample+11] d
-	fstp	dword [edi + (11*4)]		; autoc[11]+=d*data[sample+11]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (10*4)]		; ST = d*data[sample+10] d
-	fadd	dword [edi + (10*4)]		; ST = autoc[10]+d*data[sample+10] d
-	fstp	dword [edi + (10*4)]		; autoc[10]+=d*data[sample+10]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 9*4)]		; ST = d*data[sample+9] d
-	fadd	dword [edi + ( 9*4)]		; ST = autoc[9]+d*data[sample+9] d
-	fstp	dword [edi + ( 9*4)]		; autoc[9]+=d*data[sample+9]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 8*4)]		; ST = d*data[sample+8] d
-	fadd	dword [edi + ( 8*4)]		; ST = autoc[8]+d*data[sample+8] d
-	fstp	dword [edi + ( 8*4)]		; autoc[8]+=d*data[sample+8]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 7*4)]		; ST = d*data[sample+7] d
-	fadd	dword [edi + ( 7*4)]		; ST = autoc[7]+d*data[sample+7] d
-	fstp	dword [edi + ( 7*4)]		; autoc[7]+=d*data[sample+7]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 6*4)]		; ST = d*data[sample+6] d
-	fadd	dword [edi + ( 6*4)]		; ST = autoc[6]+d*data[sample+6] d
-	fstp	dword [edi + ( 6*4)]		; autoc[6]+=d*data[sample+6]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 5*4)]		; ST = d*data[sample+4] d
-	fadd	dword [edi + ( 5*4)]		; ST = autoc[4]+d*data[sample+4] d
-	fstp	dword [edi + ( 5*4)]		; autoc[4]+=d*data[sample+4]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 4*4)]		; ST = d*data[sample+4] d
-	fadd	dword [edi + ( 4*4)]		; ST = autoc[4]+d*data[sample+4] d
-	fstp	dword [edi + ( 4*4)]		; autoc[4]+=d*data[sample+4]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 3*4)]		; ST = d*data[sample+3] d
-	fadd	dword [edi + ( 3*4)]		; ST = autoc[3]+d*data[sample+3] d
-	fstp	dword [edi + ( 3*4)]		; autoc[3]+=d*data[sample+3]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 2*4)]		; ST = d*data[sample+2] d
-	fadd	dword [edi + ( 2*4)]		; ST = autoc[2]+d*data[sample+2] d
-	fstp	dword [edi + ( 2*4)]		; autoc[2]+=d*data[sample+2]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 1*4)]		; ST = d*data[sample+1] d
-	fadd	dword [edi + ( 1*4)]		; ST = autoc[1]+d*data[sample+1] d
-	fstp	dword [edi + ( 1*4)]		; autoc[1]+=d*data[sample+1]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi]			; ST = d*data[sample] d			WATCHOUT: no displacement byte here!
-	fadd	dword [edi]			; ST = autoc[0]+d*data[sample] d	WATCHOUT: no displacement byte here!
-	fstp	dword [edi]			; autoc[0]+=d*data[sample]  ST = d	WATCHOUT: no displacement byte here!
-.jumper1_0:
-
-	fstp	st0				; pop d, ST = empty
-	add	esi, byte 4			; sample++
-	dec	ecx
-	jz	.loop1_end
-	fld	dword [esi]			; ST = d <- data[sample]
-	jmp	edx
-.loop1_end:
-
-	;	for(; sample < data_len; sample++) {
-	;		d = data[sample];
-	;		for(coeff = 0; coeff < data_len - sample; coeff++)
-	;			autoc[coeff] += d * data[sample+coeff];
-	;	}
-	mov	ecx, [esp + 24]			; ecx <- lag
-	dec	ecx				; ecx <- lag - 1
-	jz	near .end			; skip loop if 0 (i.e. lag == 1)
-
-	fld	dword [esi]			; ST = d <- data[sample]
-	mov	eax, ecx			; eax <- lag - 1 == data_len - sample the first time through
-	; each iteration is 11 bytes so we need (-eax)*11, so we do (-12*eax + eax)
-	lea	edx, [eax + eax*2]
-	neg	edx
-	lea	edx, [eax + edx*4 + .jumper2_0 - .get_eip2]
-	call	.mov_eip_to_ebx
-.get_eip2:
-	add	edx, ebx
-	inc	edx				; compensate for the shorter opcode on the last iteration
-	inc	edx				; compensate for the shorter opcode on the last iteration
-	inc	edx				; compensate for the shorter opcode on the last iteration
-	jmp	edx
-
-	fld	st0				; ST = d d
-	fmul	dword [esi + (31*4)]		; ST = d*data[sample+31] d
-	fadd	dword [edi + (31*4)]		; ST = autoc[31]+d*data[sample+31] d
-	fstp	dword [edi + (31*4)]		; autoc[31]+=d*data[sample+31]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (30*4)]		; ST = d*data[sample+30] d
-	fadd	dword [edi + (30*4)]		; ST = autoc[30]+d*data[sample+30] d
-	fstp	dword [edi + (30*4)]		; autoc[30]+=d*data[sample+30]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (29*4)]		; ST = d*data[sample+29] d
-	fadd	dword [edi + (29*4)]		; ST = autoc[29]+d*data[sample+29] d
-	fstp	dword [edi + (29*4)]		; autoc[29]+=d*data[sample+29]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (28*4)]		; ST = d*data[sample+28] d
-	fadd	dword [edi + (28*4)]		; ST = autoc[28]+d*data[sample+28] d
-	fstp	dword [edi + (28*4)]		; autoc[28]+=d*data[sample+28]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (27*4)]		; ST = d*data[sample+27] d
-	fadd	dword [edi + (27*4)]		; ST = autoc[27]+d*data[sample+27] d
-	fstp	dword [edi + (27*4)]		; autoc[27]+=d*data[sample+27]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (26*4)]		; ST = d*data[sample+26] d
-	fadd	dword [edi + (26*4)]		; ST = autoc[26]+d*data[sample+26] d
-	fstp	dword [edi + (26*4)]		; autoc[26]+=d*data[sample+26]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (25*4)]		; ST = d*data[sample+25] d
-	fadd	dword [edi + (25*4)]		; ST = autoc[25]+d*data[sample+25] d
-	fstp	dword [edi + (25*4)]		; autoc[25]+=d*data[sample+25]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (24*4)]		; ST = d*data[sample+24] d
-	fadd	dword [edi + (24*4)]		; ST = autoc[24]+d*data[sample+24] d
-	fstp	dword [edi + (24*4)]		; autoc[24]+=d*data[sample+24]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (23*4)]		; ST = d*data[sample+23] d
-	fadd	dword [edi + (23*4)]		; ST = autoc[23]+d*data[sample+23] d
-	fstp	dword [edi + (23*4)]		; autoc[23]+=d*data[sample+23]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (22*4)]		; ST = d*data[sample+22] d
-	fadd	dword [edi + (22*4)]		; ST = autoc[22]+d*data[sample+22] d
-	fstp	dword [edi + (22*4)]		; autoc[22]+=d*data[sample+22]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (21*4)]		; ST = d*data[sample+21] d
-	fadd	dword [edi + (21*4)]		; ST = autoc[21]+d*data[sample+21] d
-	fstp	dword [edi + (21*4)]		; autoc[21]+=d*data[sample+21]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (20*4)]		; ST = d*data[sample+20] d
-	fadd	dword [edi + (20*4)]		; ST = autoc[20]+d*data[sample+20] d
-	fstp	dword [edi + (20*4)]		; autoc[20]+=d*data[sample+20]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (19*4)]		; ST = d*data[sample+19] d
-	fadd	dword [edi + (19*4)]		; ST = autoc[19]+d*data[sample+19] d
-	fstp	dword [edi + (19*4)]		; autoc[19]+=d*data[sample+19]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (18*4)]		; ST = d*data[sample+18] d
-	fadd	dword [edi + (18*4)]		; ST = autoc[18]+d*data[sample+18] d
-	fstp	dword [edi + (18*4)]		; autoc[18]+=d*data[sample+18]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (17*4)]		; ST = d*data[sample+17] d
-	fadd	dword [edi + (17*4)]		; ST = autoc[17]+d*data[sample+17] d
-	fstp	dword [edi + (17*4)]		; autoc[17]+=d*data[sample+17]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (16*4)]		; ST = d*data[sample+16] d
-	fadd	dword [edi + (16*4)]		; ST = autoc[16]+d*data[sample+16] d
-	fstp	dword [edi + (16*4)]		; autoc[16]+=d*data[sample+16]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (15*4)]		; ST = d*data[sample+15] d
-	fadd	dword [edi + (15*4)]		; ST = autoc[15]+d*data[sample+15] d
-	fstp	dword [edi + (15*4)]		; autoc[15]+=d*data[sample+15]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (14*4)]		; ST = d*data[sample+14] d
-	fadd	dword [edi + (14*4)]		; ST = autoc[14]+d*data[sample+14] d
-	fstp	dword [edi + (14*4)]		; autoc[14]+=d*data[sample+14]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (13*4)]		; ST = d*data[sample+13] d
-	fadd	dword [edi + (13*4)]		; ST = autoc[13]+d*data[sample+13] d
-	fstp	dword [edi + (13*4)]		; autoc[13]+=d*data[sample+13]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (12*4)]		; ST = d*data[sample+12] d
-	fadd	dword [edi + (12*4)]		; ST = autoc[12]+d*data[sample+12] d
-	fstp	dword [edi + (12*4)]		; autoc[12]+=d*data[sample+12]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (11*4)]		; ST = d*data[sample+11] d
-	fadd	dword [edi + (11*4)]		; ST = autoc[11]+d*data[sample+11] d
-	fstp	dword [edi + (11*4)]		; autoc[11]+=d*data[sample+11]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + (10*4)]		; ST = d*data[sample+10] d
-	fadd	dword [edi + (10*4)]		; ST = autoc[10]+d*data[sample+10] d
-	fstp	dword [edi + (10*4)]		; autoc[10]+=d*data[sample+10]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 9*4)]		; ST = d*data[sample+9] d
-	fadd	dword [edi + ( 9*4)]		; ST = autoc[9]+d*data[sample+9] d
-	fstp	dword [edi + ( 9*4)]		; autoc[9]+=d*data[sample+9]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 8*4)]		; ST = d*data[sample+8] d
-	fadd	dword [edi + ( 8*4)]		; ST = autoc[8]+d*data[sample+8] d
-	fstp	dword [edi + ( 8*4)]		; autoc[8]+=d*data[sample+8]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 7*4)]		; ST = d*data[sample+7] d
-	fadd	dword [edi + ( 7*4)]		; ST = autoc[7]+d*data[sample+7] d
-	fstp	dword [edi + ( 7*4)]		; autoc[7]+=d*data[sample+7]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 6*4)]		; ST = d*data[sample+6] d
-	fadd	dword [edi + ( 6*4)]		; ST = autoc[6]+d*data[sample+6] d
-	fstp	dword [edi + ( 6*4)]		; autoc[6]+=d*data[sample+6]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 5*4)]		; ST = d*data[sample+4] d
-	fadd	dword [edi + ( 5*4)]		; ST = autoc[4]+d*data[sample+4] d
-	fstp	dword [edi + ( 5*4)]		; autoc[4]+=d*data[sample+4]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 4*4)]		; ST = d*data[sample+4] d
-	fadd	dword [edi + ( 4*4)]		; ST = autoc[4]+d*data[sample+4] d
-	fstp	dword [edi + ( 4*4)]		; autoc[4]+=d*data[sample+4]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 3*4)]		; ST = d*data[sample+3] d
-	fadd	dword [edi + ( 3*4)]		; ST = autoc[3]+d*data[sample+3] d
-	fstp	dword [edi + ( 3*4)]		; autoc[3]+=d*data[sample+3]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 2*4)]		; ST = d*data[sample+2] d
-	fadd	dword [edi + ( 2*4)]		; ST = autoc[2]+d*data[sample+2] d
-	fstp	dword [edi + ( 2*4)]		; autoc[2]+=d*data[sample+2]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi + ( 1*4)]		; ST = d*data[sample+1] d
-	fadd	dword [edi + ( 1*4)]		; ST = autoc[1]+d*data[sample+1] d
-	fstp	dword [edi + ( 1*4)]		; autoc[1]+=d*data[sample+1]  ST = d
-	fld	st0				; ST = d d
-	fmul	dword [esi]			; ST = d*data[sample] d			WATCHOUT: no displacement byte here!
-	fadd	dword [edi]			; ST = autoc[0]+d*data[sample] d	WATCHOUT: no displacement byte here!
-	fstp	dword [edi]			; autoc[0]+=d*data[sample]  ST = d	WATCHOUT: no displacement byte here!
-.jumper2_0:
-
-	fstp	st0				; pop d, ST = empty
-	add	esi, byte 4			; sample++
-	dec	ecx
-	jz	.loop2_end
-	add	edx, byte 11			; adjust our inner loop counter by adjusting the jump target
-	fld	dword [esi]			; ST = d <- data[sample]
-	jmp	edx
-.loop2_end:
-
-.end:
-	pop	ebx
-	pop	edi
-	pop	esi
-	ret
-
-	ALIGN 16
-cident FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_4
-	;[esp + 16] == autoc[]
-	;[esp + 12] == lag
-	;[esp + 8] == data_len
-	;[esp + 4] == data[]
-
-	;ASSERT(lag > 0)
-	;ASSERT(lag <= 4)
-	;ASSERT(lag <= data_len)
-
-	;	for(coeff = 0; coeff < lag; coeff++)
-	;		autoc[coeff] = 0.0;
-	xorps	xmm5, xmm5
-
-	mov	edx, [esp + 8]			; edx == data_len
-	mov	eax, [esp + 4]			; eax == &data[sample] <- &data[0]
-
-	movss	xmm0, [eax]			; xmm0 = 0,0,0,data[0]
-	add	eax, 4
-	movaps	xmm2, xmm0			; xmm2 = 0,0,0,data[0]
-	shufps	xmm0, xmm0, 0			; xmm0 == data[sample],data[sample],data[sample],data[sample] = data[0],data[0],data[0],data[0]
-.warmup:					; xmm2 == data[sample-3],data[sample-2],data[sample-1],data[sample]
-	mulps	xmm0, xmm2			; xmm0 = xmm0 * xmm2
-	addps	xmm5, xmm0			; xmm5 += xmm0 * xmm2
-	dec	edx
-	jz	.loop_end
-	ALIGN 16
-.loop_start:
-	; start by reading the next sample
-	movss	xmm0, [eax]			; xmm0 = 0,0,0,data[sample]
-	add	eax, 4
-	shufps	xmm0, xmm0, 0			; xmm0 = data[sample],data[sample],data[sample],data[sample]
-	shufps	xmm2, xmm2, 93h			; 93h=2-1-0-3 => xmm2 gets rotated left by one float
-	movss	xmm2, xmm0
-	mulps	xmm0, xmm2			; xmm0 = xmm0 * xmm2
-	addps	xmm5, xmm0			; xmm5 += xmm0 * xmm2
-	dec	edx
-	jnz	.loop_start
-.loop_end:
-	; store autoc
-	mov	edx, [esp + 16]			; edx == autoc
-	movups	[edx], xmm5
-
-.end:
-	ret
-
-	ALIGN 16
-cident FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_8
-	;[esp + 16] == autoc[]
-	;[esp + 12] == lag
-	;[esp + 8] == data_len
-	;[esp + 4] == data[]
-
-	;ASSERT(lag > 0)
-	;ASSERT(lag <= 8)
-	;ASSERT(lag <= data_len)
-
-	;	for(coeff = 0; coeff < lag; coeff++)
-	;		autoc[coeff] = 0.0;
-	xorps	xmm5, xmm5
-	xorps	xmm6, xmm6
-
-	mov	edx, [esp + 8]			; edx == data_len
-	mov	eax, [esp + 4]			; eax == &data[sample] <- &data[0]
-
-	movss	xmm0, [eax]			; xmm0 = 0,0,0,data[0]
-	add	eax, 4
-	movaps	xmm2, xmm0			; xmm2 = 0,0,0,data[0]
-	shufps	xmm0, xmm0, 0			; xmm0 == data[sample],data[sample],data[sample],data[sample] = data[0],data[0],data[0],data[0]
-	movaps	xmm1, xmm0			; xmm1 == data[sample],data[sample],data[sample],data[sample] = data[0],data[0],data[0],data[0]
-	xorps	xmm3, xmm3			; xmm3 = 0,0,0,0
-.warmup:					; xmm3:xmm2 == data[sample-7],data[sample-6],...,data[sample]
-	mulps	xmm0, xmm2
-	mulps	xmm1, xmm3			; xmm1:xmm0 = xmm1:xmm0 * xmm3:xmm2
-	addps	xmm5, xmm0
-	addps	xmm6, xmm1			; xmm6:xmm5 += xmm1:xmm0 * xmm3:xmm2
-	dec	edx
-	jz	.loop_end
-	ALIGN 16
-.loop_start:
-	; start by reading the next sample
-	movss	xmm0, [eax]			; xmm0 = 0,0,0,data[sample]
-	; here we reorder the instructions; see the (#) indexes for a logical order
-	shufps	xmm2, xmm2, 93h			; (3) 93h=2-1-0-3 => xmm2 gets rotated left by one float
-	add	eax, 4				; (0)
-	shufps	xmm3, xmm3, 93h			; (4) 93h=2-1-0-3 => xmm3 gets rotated left by one float
-	shufps	xmm0, xmm0, 0			; (1) xmm0 = data[sample],data[sample],data[sample],data[sample]
-	movss	xmm3, xmm2			; (5)
-	movaps	xmm1, xmm0			; (2) xmm1 = data[sample],data[sample],data[sample],data[sample]
-	movss	xmm2, xmm0			; (6)
-	mulps	xmm1, xmm3			; (8)
-	mulps	xmm0, xmm2			; (7) xmm1:xmm0 = xmm1:xmm0 * xmm3:xmm2
-	addps	xmm6, xmm1			; (10)
-	addps	xmm5, xmm0			; (9) xmm6:xmm5 += xmm1:xmm0 * xmm3:xmm2
-	dec	edx
-	jnz	.loop_start
-.loop_end:
-	; store autoc
-	mov	edx, [esp + 16]			; edx == autoc
-	movups	[edx], xmm5
-	movups	[edx + 16], xmm6
-
-.end:
-	ret
-
-	ALIGN 16
-cident FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_12
-	;[esp + 16] == autoc[]
-	;[esp + 12] == lag
-	;[esp + 8] == data_len
-	;[esp + 4] == data[]
-
-	;ASSERT(lag > 0)
-	;ASSERT(lag <= 12)
-	;ASSERT(lag <= data_len)
-
-	;	for(coeff = 0; coeff < lag; coeff++)
-	;		autoc[coeff] = 0.0;
-	xorps	xmm5, xmm5
-	xorps	xmm6, xmm6
-	xorps	xmm7, xmm7
-
-	mov	edx, [esp + 8]			; edx == data_len
-	mov	eax, [esp + 4]			; eax == &data[sample] <- &data[0]
-
-	movss	xmm0, [eax]			; xmm0 = 0,0,0,data[0]
-	add	eax, 4
-	movaps	xmm2, xmm0			; xmm2 = 0,0,0,data[0]
-	shufps	xmm0, xmm0, 0			; xmm0 == data[sample],data[sample],data[sample],data[sample] = data[0],data[0],data[0],data[0]
-	xorps	xmm3, xmm3			; xmm3 = 0,0,0,0
-	xorps	xmm4, xmm4			; xmm4 = 0,0,0,0
-.warmup:					; xmm3:xmm2 == data[sample-7],data[sample-6],...,data[sample]
-	movaps	xmm1, xmm0
-	mulps	xmm1, xmm2
-	addps	xmm5, xmm1
-	movaps	xmm1, xmm0
-	mulps	xmm1, xmm3
-	addps	xmm6, xmm1
-	mulps	xmm0, xmm4
-	addps	xmm7, xmm0			; xmm7:xmm6:xmm5 += xmm0:xmm0:xmm0 * xmm4:xmm3:xmm2
-	dec	edx
-	jz	.loop_end
-	ALIGN 16
-.loop_start:
-	; start by reading the next sample
-	movss	xmm0, [eax]			; xmm0 = 0,0,0,data[sample]
-	add	eax, 4
-	shufps	xmm0, xmm0, 0			; xmm0 = data[sample],data[sample],data[sample],data[sample]
-
-	; shift xmm4:xmm3:xmm2 left by one float
-	shufps	xmm2, xmm2, 93h			; 93h=2-1-0-3 => xmm2 gets rotated left by one float
-	shufps	xmm3, xmm3, 93h			; 93h=2-1-0-3 => xmm3 gets rotated left by one float
-	shufps	xmm4, xmm4, 93h			; 93h=2-1-0-3 => xmm4 gets rotated left by one float
-	movss	xmm4, xmm3
-	movss	xmm3, xmm2
-	movss	xmm2, xmm0
-
-	; xmm7:xmm6:xmm5 += xmm0:xmm0:xmm0 * xmm4:xmm3:xmm2
-	movaps	xmm1, xmm0
-	mulps	xmm1, xmm2
-	addps	xmm5, xmm1
-	movaps	xmm1, xmm0
-	mulps	xmm1, xmm3
-	addps	xmm6, xmm1
-	mulps	xmm0, xmm4
-	addps	xmm7, xmm0
-
-	dec	edx
-	jnz	.loop_start
-.loop_end:
-	; store autoc
-	mov	edx, [esp + 16]			; edx == autoc
-	movups	[edx], xmm5
-	movups	[edx + 16], xmm6
-	movups	[edx + 32], xmm7
-
-.end:
-	ret
-
-	ALIGN 16
-cident FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_16
-	;[ebp + 20] == autoc[]
-	;[ebp + 16] == lag
-	;[ebp + 12] == data_len
-	;[ebp +  8] == data[]
-	;[esp] == __m128
-	;[esp + 16] == __m128
-
-	push	ebp
-	mov	ebp, esp
-	and	esp, -16 ; stack realign for SSE instructions 'movaps' and 'addps'
-	sub	esp, 32
-
-	;ASSERT(lag > 0)
-	;ASSERT(lag <= 12)
-	;ASSERT(lag <= data_len)
-	;ASSERT(data_len > 0)
-
-	;	for(coeff = 0; coeff < lag; coeff++)
-	;		autoc[coeff] = 0.0;
-	xorps	xmm5, xmm5
-	xorps	xmm6, xmm6
-	movaps	[esp], xmm5
-	movaps	[esp + 16], xmm6
-
-	mov	edx, [ebp + 12]			; edx == data_len
-	mov	eax, [ebp +  8]			; eax == &data[sample] <- &data[0]
-
-	movss	xmm0, [eax]			; xmm0 = 0,0,0,data[0]
-	add	eax, 4
-	movaps	xmm1, xmm0			; xmm1 = 0,0,0,data[0]
-	shufps	xmm0, xmm0, 0		; xmm0 == data[sample],data[sample],data[sample],data[sample] = data[0],data[0],data[0],data[0]
-	xorps	xmm2, xmm2			; xmm2 = 0,0,0,0
-	xorps	xmm3, xmm3			; xmm3 = 0,0,0,0
-	xorps	xmm4, xmm4			; xmm4 = 0,0,0,0
-	movaps	xmm7, xmm0
-	mulps	xmm7, xmm1
-	addps	xmm5, xmm7
-	dec	edx
-	jz	.loop_end
-	ALIGN 16
-.loop_start:
-	; start by reading the next sample
-	movss	xmm0, [eax]				; xmm0 = 0,0,0,data[sample]
-	add	eax, 4
-	shufps	xmm0, xmm0, 0			; xmm0 = data[sample],data[sample],data[sample],data[sample]
-
-	; shift xmm4:xmm3:xmm2:xmm1 left by one float
-	shufps	xmm1, xmm1, 93h
-	shufps	xmm2, xmm2, 93h
-	shufps	xmm3, xmm3, 93h
-	shufps	xmm4, xmm4, 93h
-	movss	xmm4, xmm3
-	movss	xmm3, xmm2
-	movss	xmm2, xmm1
-	movss	xmm1, xmm0
-
-	; xmmB:xmmA:xmm6:xmm5 += xmm0:xmm0:xmm0:xmm0 * xmm4:xmm3:xmm2:xmm1
-	movaps	xmm7, xmm0
-	mulps	xmm7, xmm1
-	addps	xmm5, xmm7
-	movaps	xmm7, xmm0
-	mulps	xmm7, xmm2
-	addps	xmm6, xmm7
-	movaps	xmm7, xmm0
-	mulps	xmm7, xmm3
-	mulps	xmm0, xmm4
-	addps	xmm7, [esp]
-	addps	xmm0, [esp + 16]
-	movaps	[esp], xmm7
-	movaps	[esp + 16], xmm0
-
-	dec	edx
-	jnz	.loop_start
-.loop_end:
-	; store autoc
-	mov	edx, [ebp + 20]				; edx == autoc
-	movups	[edx], xmm5
-	movups	[edx + 16], xmm6
-	movaps	xmm5, [esp]
-	movaps	xmm6, [esp + 16]
-	movups	[edx + 32], xmm5
-	movups	[edx + 48], xmm6
-.end:
-	mov	esp, ebp
-	pop	ebp
-	ret
-
-;void FLAC__lpc_compute_residual_from_qlp_coefficients(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
-;
-;	for(i = 0; i < data_len; i++) {
-;		sum = 0;
-;		for(j = 0; j < order; j++)
-;			sum += qlp_coeff[j] * data[i-j-1];
-;		residual[i] = data[i] - (sum >> lp_quantization);
-;	}
-;
-	ALIGN	16
-cident FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32
-	;[esp + 40]	residual[]
-	;[esp + 36]	lp_quantization
-	;[esp + 32]	order
-	;[esp + 28]	qlp_coeff[]
-	;[esp + 24]	data_len
-	;[esp + 20]	data[]
-
-	;ASSERT(order > 0)
-
-	push	ebp
-	push	ebx
-	push	esi
-	push	edi
-
-	mov	esi, [esp + 20]			; esi = data[]
-	mov	edi, [esp + 40]			; edi = residual[]
-	mov	eax, [esp + 32]			; eax = order
-	mov	ebx, [esp + 24]			; ebx = data_len
-
-	test	ebx, ebx
-	jz	near .end			; do nothing if data_len == 0
-.begin:
-	cmp	eax, byte 1
-	jg	short .i_1more
-
-	mov	ecx, [esp + 28]
-	mov	edx, [ecx]			; edx = qlp_coeff[0]
-	mov	eax, [esi - 4]			; eax = data[-1]
-	mov	ecx, [esp + 36]			; cl = lp_quantization
-	ALIGN	16
-.i_1_loop_i:
-	imul	eax, edx
-	sar	eax, cl
-	neg	eax
-	add	eax, [esi]
-	mov	[edi], eax
-	mov	eax, [esi]
-	add	edi, byte 4
-	add	esi, byte 4
-	dec	ebx
-	jnz	.i_1_loop_i
-
-	jmp	.end
-
-.i_1more:
-	cmp	eax, byte 32			; for order <= 32 there is a faster routine
-	jbe	short .i_32
-
-	; This version is here just for completeness, since FLAC__MAX_LPC_ORDER == 32
-	ALIGN 16
-.i_32more_loop_i:
-	xor	ebp, ebp
-	mov	ecx, [esp + 32]
-	mov	edx, ecx
-	shl	edx, 2
-	add	edx, [esp + 28]
-	neg	ecx
-	ALIGN	16
-.i_32more_loop_j:
-	sub	edx, byte 4
-	mov	eax, [edx]
-	imul	eax, [esi + 4 * ecx]
-	add	ebp, eax
-	inc	ecx
-	jnz	short .i_32more_loop_j
-
-	mov	ecx, [esp + 36]
-	sar	ebp, cl
-	neg	ebp
-	add	ebp, [esi]
-	mov	[edi], ebp
-	add	esi, byte 4
-	add	edi, byte 4
-
-	dec	ebx
-	jnz	.i_32more_loop_i
-
-	jmp	.end
-
-.mov_eip_to_eax:
-	mov	eax, [esp]
-	ret
-
-.i_32:
-	sub	edi, esi
-	neg	eax
-	lea	edx, [eax + eax * 8 + .jumper_0 - .get_eip0]
-	call	.mov_eip_to_eax
-.get_eip0:
-	add	edx, eax
-	inc	edx
-	mov	eax, [esp + 28]			; eax = qlp_coeff[]
-	xor	ebp, ebp
-	jmp	edx
-
-	mov	ecx, [eax + 124]
-	imul	ecx, [esi - 128]
-	add	ebp, ecx
-	mov	ecx, [eax + 120]
-	imul	ecx, [esi - 124]
-	add	ebp, ecx
-	mov	ecx, [eax + 116]
-	imul	ecx, [esi - 120]
-	add	ebp, ecx
-	mov	ecx, [eax + 112]
-	imul	ecx, [esi - 116]
-	add	ebp, ecx
-	mov	ecx, [eax + 108]
-	imul	ecx, [esi - 112]
-	add	ebp, ecx
-	mov	ecx, [eax + 104]
-	imul	ecx, [esi - 108]
-	add	ebp, ecx
-	mov	ecx, [eax + 100]
-	imul	ecx, [esi - 104]
-	add	ebp, ecx
-	mov	ecx, [eax + 96]
-	imul	ecx, [esi - 100]
-	add	ebp, ecx
-	mov	ecx, [eax + 92]
-	imul	ecx, [esi - 96]
-	add	ebp, ecx
-	mov	ecx, [eax + 88]
-	imul	ecx, [esi - 92]
-	add	ebp, ecx
-	mov	ecx, [eax + 84]
-	imul	ecx, [esi - 88]
-	add	ebp, ecx
-	mov	ecx, [eax + 80]
-	imul	ecx, [esi - 84]
-	add	ebp, ecx
-	mov	ecx, [eax + 76]
-	imul	ecx, [esi - 80]
-	add	ebp, ecx
-	mov	ecx, [eax + 72]
-	imul	ecx, [esi - 76]
-	add	ebp, ecx
-	mov	ecx, [eax + 68]
-	imul	ecx, [esi - 72]
-	add	ebp, ecx
-	mov	ecx, [eax + 64]
-	imul	ecx, [esi - 68]
-	add	ebp, ecx
-	mov	ecx, [eax + 60]
-	imul	ecx, [esi - 64]
-	add	ebp, ecx
-	mov	ecx, [eax + 56]
-	imul	ecx, [esi - 60]
-	add	ebp, ecx
-	mov	ecx, [eax + 52]
-	imul	ecx, [esi - 56]
-	add	ebp, ecx
-	mov	ecx, [eax + 48]
-	imul	ecx, [esi - 52]
-	add	ebp, ecx
-	mov	ecx, [eax + 44]
-	imul	ecx, [esi - 48]
-	add	ebp, ecx
-	mov	ecx, [eax + 40]
-	imul	ecx, [esi - 44]
-	add	ebp, ecx
-	mov	ecx, [eax + 36]
-	imul	ecx, [esi - 40]
-	add	ebp, ecx
-	mov	ecx, [eax + 32]
-	imul	ecx, [esi - 36]
-	add	ebp, ecx
-	mov	ecx, [eax + 28]
-	imul	ecx, [esi - 32]
-	add	ebp, ecx
-	mov	ecx, [eax + 24]
-	imul	ecx, [esi - 28]
-	add	ebp, ecx
-	mov	ecx, [eax + 20]
-	imul	ecx, [esi - 24]
-	add	ebp, ecx
-	mov	ecx, [eax + 16]
-	imul	ecx, [esi - 20]
-	add	ebp, ecx
-	mov	ecx, [eax + 12]
-	imul	ecx, [esi - 16]
-	add	ebp, ecx
-	mov	ecx, [eax + 8]
-	imul	ecx, [esi - 12]
-	add	ebp, ecx
-	mov	ecx, [eax + 4]
-	imul	ecx, [esi - 8]
-	add	ebp, ecx
-	mov	ecx, [eax]			; there is one byte missing
-	imul	ecx, [esi - 4]
-	add	ebp, ecx
-.jumper_0:
-
-	mov	ecx, [esp + 36]
-	sar	ebp, cl
-	neg	ebp
-	add	ebp, [esi]
-	mov	[edi + esi], ebp
-	add	esi, byte 4
-
-	dec	ebx
-	jz	short .end
-	xor	ebp, ebp
-	jmp	edx
-
-.end:
-	pop	edi
-	pop	esi
-	pop	ebx
-	pop	ebp
-	ret
-
-; WATCHOUT: this routine works on 16 bit data which means bits-per-sample for
-; the channel and qlp_coeffs must be <= 16.  Especially note that this routine
-; cannot be used for side-channel coded 16bps channels since the effective bps
-; is 17.
-	ALIGN	16
-cident FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx
-	;[esp + 40]	residual[]
-	;[esp + 36]	lp_quantization
-	;[esp + 32]	order
-	;[esp + 28]	qlp_coeff[]
-	;[esp + 24]	data_len
-	;[esp + 20]	data[]
-
-	;ASSERT(order > 0)
-
-	push	ebp
-	push	ebx
-	push	esi
-	push	edi
-
-	mov	esi, [esp + 20]			; esi = data[]
-	mov	edi, [esp + 40]			; edi = residual[]
-	mov	eax, [esp + 32]			; eax = order
-	mov	ebx, [esp + 24]			; ebx = data_len
-
-	test	ebx, ebx
-	jz	near .end			; do nothing if data_len == 0
-	dec	ebx
-	test	ebx, ebx
-	jz	near .last_one
-
-	mov	edx, [esp + 28]			; edx = qlp_coeff[]
-	movd	mm6, [esp + 36]			; mm6 = 0:lp_quantization
-	mov	ebp, esp
-
-	and	esp, 0xfffffff8
-
-	xor	ecx, ecx
-.copy_qlp_loop:
-	push	word [edx + 4 * ecx]
-	inc	ecx
-	cmp	ecx, eax
-	jnz	short .copy_qlp_loop
-
-	and	ecx, 0x3
-	test	ecx, ecx
-	je	short .za_end
-	sub	ecx, byte 4
-.za_loop:
-	push	word 0
-	inc	eax
-	inc	ecx
-	jnz	short .za_loop
-.za_end:
-
-	movq	mm5, [esp + 2 * eax - 8]
-	movd	mm4, [esi - 16]
-	punpckldq	mm4, [esi - 12]
-	movd	mm0, [esi - 8]
-	punpckldq	mm0, [esi - 4]
-	packssdw	mm4, mm0
-
-	cmp	eax, byte 4
-	jnbe	short .mmx_4more
-
-	ALIGN	16
-.mmx_4_loop_i:
-	movd	mm1, [esi]
-	movq	mm3, mm4
-	punpckldq	mm1, [esi + 4]
-	psrlq	mm4, 16
-	movq	mm0, mm1
-	psllq	mm0, 48
-	por	mm4, mm0
-	movq	mm2, mm4
-	psrlq	mm4, 16
-	pxor	mm0, mm0
-	punpckhdq	mm0, mm1
-	pmaddwd	mm3, mm5
-	pmaddwd	mm2, mm5
-	psllq	mm0, 16
-	por	mm4, mm0
-	movq	mm0, mm3
-	punpckldq	mm3, mm2
-	punpckhdq	mm0, mm2
-	paddd	mm3, mm0
-	psrad	mm3, mm6
-	psubd	mm1, mm3
-	movd	[edi], mm1
-	punpckhdq	mm1, mm1
-	movd	[edi + 4], mm1
-
-	add	edi, byte 8
-	add	esi, byte 8
-
-	sub	ebx, 2
-	jg	.mmx_4_loop_i
-	jmp	.mmx_end
-
-.mmx_4more:
-	shl	eax, 2
-	neg	eax
-	add	eax, byte 16
-
-	ALIGN	16
-.mmx_4more_loop_i:
-	movd	mm1, [esi]
-	punpckldq	mm1, [esi + 4]
-	movq	mm3, mm4
-	psrlq	mm4, 16
-	movq	mm0, mm1
-	psllq	mm0, 48
-	por	mm4, mm0
-	movq	mm2, mm4
-	psrlq	mm4, 16
-	pxor	mm0, mm0
-	punpckhdq	mm0, mm1
-	pmaddwd	mm3, mm5
-	pmaddwd	mm2, mm5
-	psllq	mm0, 16
-	por	mm4, mm0
-
-	mov	ecx, esi
-	add	ecx, eax
-	mov	edx, esp
-
-	ALIGN	16
-.mmx_4more_loop_j:
-	movd	mm0, [ecx - 16]
-	movd	mm7, [ecx - 8]
-	punpckldq	mm0, [ecx - 12]
-	punpckldq	mm7, [ecx - 4]
-	packssdw	mm0, mm7
-	pmaddwd	mm0, [edx]
-	punpckhdq	mm7, mm7
-	paddd	mm3, mm0
-	movd	mm0, [ecx - 12]
-	punpckldq	mm0, [ecx - 8]
-	punpckldq	mm7, [ecx]
-	packssdw	mm0, mm7
-	pmaddwd	mm0, [edx]
-	paddd	mm2, mm0
-
-	add	edx, byte 8
-	add	ecx, byte 16
-	cmp	ecx, esi
-	jnz	.mmx_4more_loop_j
-
-	movq	mm0, mm3
-	punpckldq	mm3, mm2
-	punpckhdq	mm0, mm2
-	paddd	mm3, mm0
-	psrad	mm3, mm6
-	psubd	mm1, mm3
-	movd	[edi], mm1
-	punpckhdq	mm1, mm1
-	movd	[edi + 4], mm1
-
-	add	edi, byte 8
-	add	esi, byte 8
-
-	sub	ebx, 2
-	jg	near .mmx_4more_loop_i
-
-.mmx_end:
-	emms
-	mov	esp, ebp
-.last_one:
-	mov	eax, [esp + 32]
-	inc	ebx
-	jnz	near FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32.begin
-
-.end:
-	pop	edi
-	pop	esi
-	pop	ebx
-	pop	ebp
-	ret
-
-; **********************************************************************
-;
-; void FLAC__lpc_restore_signal(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[])
-; {
-; 	unsigned i, j;
-; 	FLAC__int32 sum;
-;
-; 	FLAC__ASSERT(order > 0);
-;
-; 	for(i = 0; i < data_len; i++) {
-; 		sum = 0;
-; 		for(j = 0; j < order; j++)
-; 			sum += qlp_coeff[j] * data[i-j-1];
-; 		data[i] = residual[i] + (sum >> lp_quantization);
-; 	}
-; }
-	ALIGN	16
-cident FLAC__lpc_restore_signal_asm_ia32
-	;[esp + 40]	data[]
-	;[esp + 36]	lp_quantization
-	;[esp + 32]	order
-	;[esp + 28]	qlp_coeff[]
-	;[esp + 24]	data_len
-	;[esp + 20]	residual[]
-
-	;ASSERT(order > 0)
-
-	push	ebp
-	push	ebx
-	push	esi
-	push	edi
-
-	mov	esi, [esp + 20]			; esi = residual[]
-	mov	edi, [esp + 40]			; edi = data[]
-	mov	eax, [esp + 32]			; eax = order
-	mov	ebx, [esp + 24]			; ebx = data_len
-
-	test	ebx, ebx
-	jz	near .end			; do nothing if data_len == 0
-
-.begin:
-	cmp	eax, byte 1
-	jg	short .x87_1more
-
-	mov	ecx, [esp + 28]
-	mov	edx, [ecx]
-	mov	eax, [edi - 4]
-	mov	ecx, [esp + 36]
-	ALIGN	16
-.x87_1_loop_i:
-	imul	eax, edx
-	sar	eax, cl
-	add	eax, [esi]
-	mov	[edi], eax
-	add	esi, byte 4
-	add	edi, byte 4
-	dec	ebx
-	jnz	.x87_1_loop_i
-
-	jmp	.end
-
-.x87_1more:
-	cmp	eax, byte 32			; for order <= 32 there is a faster routine
-	jbe	short .x87_32
-
-	; This version is here just for completeness, since FLAC__MAX_LPC_ORDER == 32
-	ALIGN 16
-.x87_32more_loop_i:
-	xor	ebp, ebp
-	mov	ecx, [esp + 32]
-	mov	edx, ecx
-	shl	edx, 2
-	add	edx, [esp + 28]
-	neg	ecx
-	ALIGN	16
-.x87_32more_loop_j:
-	sub	edx, byte 4
-	mov	eax, [edx]
-	imul	eax, [edi + 4 * ecx]
-	add	ebp, eax
-	inc	ecx
-	jnz	short .x87_32more_loop_j
-
-	mov	ecx, [esp + 36]
-	sar	ebp, cl
-	add	ebp, [esi]
-	mov	[edi], ebp
-	add	edi, byte 4
-	add	esi, byte 4
-
-	dec	ebx
-	jnz	.x87_32more_loop_i
-
-	jmp	.end
-
-.mov_eip_to_eax:
-	mov	eax, [esp]
-	ret
-
-.x87_32:
-	sub	esi, edi
-	neg	eax
-	lea	edx, [eax + eax * 8 + .jumper_0 - .get_eip0]
-	call	.mov_eip_to_eax
-.get_eip0:
-	add	edx, eax
-	inc	edx				; compensate for the shorter opcode on the last iteration
-	mov	eax, [esp + 28]			; eax = qlp_coeff[]
-	xor	ebp, ebp
-	jmp	edx
-
-	mov	ecx, [eax + 124]		; ecx =  qlp_coeff[31]
-	imul	ecx, [edi - 128]		; ecx =  qlp_coeff[31] * data[i-32]
-	add	ebp, ecx			; sum += qlp_coeff[31] * data[i-32]
-	mov	ecx, [eax + 120]		; ecx =  qlp_coeff[30]
-	imul	ecx, [edi - 124]		; ecx =  qlp_coeff[30] * data[i-31]
-	add	ebp, ecx			; sum += qlp_coeff[30] * data[i-31]
-	mov	ecx, [eax + 116]		; ecx =  qlp_coeff[29]
-	imul	ecx, [edi - 120]		; ecx =  qlp_coeff[29] * data[i-30]
-	add	ebp, ecx			; sum += qlp_coeff[29] * data[i-30]
-	mov	ecx, [eax + 112]		; ecx =  qlp_coeff[28]
-	imul	ecx, [edi - 116]		; ecx =  qlp_coeff[28] * data[i-29]
-	add	ebp, ecx			; sum += qlp_coeff[28] * data[i-29]
-	mov	ecx, [eax + 108]		; ecx =  qlp_coeff[27]
-	imul	ecx, [edi - 112]		; ecx =  qlp_coeff[27] * data[i-28]
-	add	ebp, ecx			; sum += qlp_coeff[27] * data[i-28]
-	mov	ecx, [eax + 104]		; ecx =  qlp_coeff[26]
-	imul	ecx, [edi - 108]		; ecx =  qlp_coeff[26] * data[i-27]
-	add	ebp, ecx			; sum += qlp_coeff[26] * data[i-27]
-	mov	ecx, [eax + 100]		; ecx =  qlp_coeff[25]
-	imul	ecx, [edi - 104]		; ecx =  qlp_coeff[25] * data[i-26]
-	add	ebp, ecx			; sum += qlp_coeff[25] * data[i-26]
-	mov	ecx, [eax + 96]			; ecx =  qlp_coeff[24]
-	imul	ecx, [edi - 100]		; ecx =  qlp_coeff[24] * data[i-25]
-	add	ebp, ecx			; sum += qlp_coeff[24] * data[i-25]
-	mov	ecx, [eax + 92]			; ecx =  qlp_coeff[23]
-	imul	ecx, [edi - 96]			; ecx =  qlp_coeff[23] * data[i-24]
-	add	ebp, ecx			; sum += qlp_coeff[23] * data[i-24]
-	mov	ecx, [eax + 88]			; ecx =  qlp_coeff[22]
-	imul	ecx, [edi - 92]			; ecx =  qlp_coeff[22] * data[i-23]
-	add	ebp, ecx			; sum += qlp_coeff[22] * data[i-23]
-	mov	ecx, [eax + 84]			; ecx =  qlp_coeff[21]
-	imul	ecx, [edi - 88]			; ecx =  qlp_coeff[21] * data[i-22]
-	add	ebp, ecx			; sum += qlp_coeff[21] * data[i-22]
-	mov	ecx, [eax + 80]			; ecx =  qlp_coeff[20]
-	imul	ecx, [edi - 84]			; ecx =  qlp_coeff[20] * data[i-21]
-	add	ebp, ecx			; sum += qlp_coeff[20] * data[i-21]
-	mov	ecx, [eax + 76]			; ecx =  qlp_coeff[19]
-	imul	ecx, [edi - 80]			; ecx =  qlp_coeff[19] * data[i-20]
-	add	ebp, ecx			; sum += qlp_coeff[19] * data[i-20]
-	mov	ecx, [eax + 72]			; ecx =  qlp_coeff[18]
-	imul	ecx, [edi - 76]			; ecx =  qlp_coeff[18] * data[i-19]
-	add	ebp, ecx			; sum += qlp_coeff[18] * data[i-19]
-	mov	ecx, [eax + 68]			; ecx =  qlp_coeff[17]
-	imul	ecx, [edi - 72]			; ecx =  qlp_coeff[17] * data[i-18]
-	add	ebp, ecx			; sum += qlp_coeff[17] * data[i-18]
-	mov	ecx, [eax + 64]			; ecx =  qlp_coeff[16]
-	imul	ecx, [edi - 68]			; ecx =  qlp_coeff[16] * data[i-17]
-	add	ebp, ecx			; sum += qlp_coeff[16] * data[i-17]
-	mov	ecx, [eax + 60]			; ecx =  qlp_coeff[15]
-	imul	ecx, [edi - 64]			; ecx =  qlp_coeff[15] * data[i-16]
-	add	ebp, ecx			; sum += qlp_coeff[15] * data[i-16]
-	mov	ecx, [eax + 56]			; ecx =  qlp_coeff[14]
-	imul	ecx, [edi - 60]			; ecx =  qlp_coeff[14] * data[i-15]
-	add	ebp, ecx			; sum += qlp_coeff[14] * data[i-15]
-	mov	ecx, [eax + 52]			; ecx =  qlp_coeff[13]
-	imul	ecx, [edi - 56]			; ecx =  qlp_coeff[13] * data[i-14]
-	add	ebp, ecx			; sum += qlp_coeff[13] * data[i-14]
-	mov	ecx, [eax + 48]			; ecx =  qlp_coeff[12]
-	imul	ecx, [edi - 52]			; ecx =  qlp_coeff[12] * data[i-13]
-	add	ebp, ecx			; sum += qlp_coeff[12] * data[i-13]
-	mov	ecx, [eax + 44]			; ecx =  qlp_coeff[11]
-	imul	ecx, [edi - 48]			; ecx =  qlp_coeff[11] * data[i-12]
-	add	ebp, ecx			; sum += qlp_coeff[11] * data[i-12]
-	mov	ecx, [eax + 40]			; ecx =  qlp_coeff[10]
-	imul	ecx, [edi - 44]			; ecx =  qlp_coeff[10] * data[i-11]
-	add	ebp, ecx			; sum += qlp_coeff[10] * data[i-11]
-	mov	ecx, [eax + 36]			; ecx =  qlp_coeff[ 9]
-	imul	ecx, [edi - 40]			; ecx =  qlp_coeff[ 9] * data[i-10]
-	add	ebp, ecx			; sum += qlp_coeff[ 9] * data[i-10]
-	mov	ecx, [eax + 32]			; ecx =  qlp_coeff[ 8]
-	imul	ecx, [edi - 36]			; ecx =  qlp_coeff[ 8] * data[i- 9]
-	add	ebp, ecx			; sum += qlp_coeff[ 8] * data[i- 9]
-	mov	ecx, [eax + 28]			; ecx =  qlp_coeff[ 7]
-	imul	ecx, [edi - 32]			; ecx =  qlp_coeff[ 7] * data[i- 8]
-	add	ebp, ecx			; sum += qlp_coeff[ 7] * data[i- 8]
-	mov	ecx, [eax + 24]			; ecx =  qlp_coeff[ 6]
-	imul	ecx, [edi - 28]			; ecx =  qlp_coeff[ 6] * data[i- 7]
-	add	ebp, ecx			; sum += qlp_coeff[ 6] * data[i- 7]
-	mov	ecx, [eax + 20]			; ecx =  qlp_coeff[ 5]
-	imul	ecx, [edi - 24]			; ecx =  qlp_coeff[ 5] * data[i- 6]
-	add	ebp, ecx			; sum += qlp_coeff[ 5] * data[i- 6]
-	mov	ecx, [eax + 16]			; ecx =  qlp_coeff[ 4]
-	imul	ecx, [edi - 20]			; ecx =  qlp_coeff[ 4] * data[i- 5]
-	add	ebp, ecx			; sum += qlp_coeff[ 4] * data[i- 5]
-	mov	ecx, [eax + 12]			; ecx =  qlp_coeff[ 3]
-	imul	ecx, [edi - 16]			; ecx =  qlp_coeff[ 3] * data[i- 4]
-	add	ebp, ecx			; sum += qlp_coeff[ 3] * data[i- 4]
-	mov	ecx, [eax + 8]			; ecx =  qlp_coeff[ 2]
-	imul	ecx, [edi - 12]			; ecx =  qlp_coeff[ 2] * data[i- 3]
-	add	ebp, ecx			; sum += qlp_coeff[ 2] * data[i- 3]
-	mov	ecx, [eax + 4]			; ecx =  qlp_coeff[ 1]
-	imul	ecx, [edi - 8]			; ecx =  qlp_coeff[ 1] * data[i- 2]
-	add	ebp, ecx			; sum += qlp_coeff[ 1] * data[i- 2]
-	mov	ecx, [eax]			; ecx =  qlp_coeff[ 0] (NOTE: one byte missing from instruction)
-	imul	ecx, [edi - 4]			; ecx =  qlp_coeff[ 0] * data[i- 1]
-	add	ebp, ecx			; sum += qlp_coeff[ 0] * data[i- 1]
-.jumper_0:
-
-	mov	ecx, [esp + 36]
-	sar	ebp, cl				; ebp = (sum >> lp_quantization)
-	add	ebp, [esi + edi]		; ebp = residual[i] + (sum >> lp_quantization)
-	mov	[edi], ebp			; data[i] = residual[i] + (sum >> lp_quantization)
-	add	edi, byte 4
-
-	dec	ebx
-	jz	short .end
-	xor	ebp, ebp
-	jmp	edx
-
-.end:
-	pop	edi
-	pop	esi
-	pop	ebx
-	pop	ebp
-	ret
-
-; WATCHOUT: this routine works on 16 bit data which means bits-per-sample for
-; the channel and qlp_coeffs must be <= 16.  Especially note that this routine
-; cannot be used for side-channel coded 16bps channels since the effective bps
-; is 17.
-; WATCHOUT: this routine requires that each data array have a buffer of up to
-; 3 zeroes in front (at negative indices) for alignment purposes, i.e. for each
-; channel n, data[n][-1] through data[n][-3] should be accessible and zero.
-	ALIGN	16
-cident FLAC__lpc_restore_signal_asm_ia32_mmx
-	;[esp + 40]	data[]
-	;[esp + 36]	lp_quantization
-	;[esp + 32]	order
-	;[esp + 28]	qlp_coeff[]
-	;[esp + 24]	data_len
-	;[esp + 20]	residual[]
-
-	;ASSERT(order > 0)
-
-	push	ebp
-	push	ebx
-	push	esi
-	push	edi
-
-	mov	esi, [esp + 20]
-	mov	edi, [esp + 40]
-	mov	eax, [esp + 32]
-	mov	ebx, [esp + 24]
-
-	test	ebx, ebx
-	jz	near .end			; do nothing if data_len == 0
-	cmp	eax, byte 4
-	jb	near FLAC__lpc_restore_signal_asm_ia32.begin
-
-	mov	edx, [esp + 28]
-	movd	mm6, [esp + 36]
-	mov	ebp, esp
-
-	and	esp, 0xfffffff8
-
-	xor	ecx, ecx
-.copy_qlp_loop:
-	push	word [edx + 4 * ecx]
-	inc	ecx
-	cmp	ecx, eax
-	jnz	short .copy_qlp_loop
-
-	and	ecx, 0x3
-	test	ecx, ecx
-	je	short .za_end
-	sub	ecx, byte 4
-.za_loop:
-	push	word 0
-	inc	eax
-	inc	ecx
-	jnz	short .za_loop
-.za_end:
-
-	movq	mm5, [esp + 2 * eax - 8]
-	movd	mm4, [edi - 16]
-	punpckldq	mm4, [edi - 12]
-	movd	mm0, [edi - 8]
-	punpckldq	mm0, [edi - 4]
-	packssdw	mm4, mm0
-
-	cmp	eax, byte 4
-	jnbe	short .mmx_4more
-
-	ALIGN	16
-.mmx_4_loop_i:
-	movq	mm7, mm4
-	pmaddwd	mm7, mm5
-	movq	mm0, mm7
-	punpckhdq	mm7, mm7
-	paddd	mm7, mm0
-	psrad	mm7, mm6
-	movd	mm1, [esi]
-	paddd	mm7, mm1
-	movd	[edi], mm7
-	psllq	mm7, 48
-	psrlq	mm4, 16
-	por	mm4, mm7
-
-	add	esi, byte 4
-	add	edi, byte 4
-
-	dec	ebx
-	jnz	.mmx_4_loop_i
-	jmp	.mmx_end
-.mmx_4more:
-	shl	eax, 2
-	neg	eax
-	add	eax, byte 16
-	ALIGN	16
-.mmx_4more_loop_i:
-	mov	ecx, edi
-	add	ecx, eax
-	mov	edx, esp
-
-	movq	mm7, mm4
-	pmaddwd	mm7, mm5
-
-	ALIGN	16
-.mmx_4more_loop_j:
-	movd	mm0, [ecx - 16]
-	punpckldq	mm0, [ecx - 12]
-	movd	mm1, [ecx - 8]
-	punpckldq	mm1, [ecx - 4]
-	packssdw	mm0, mm1
-	pmaddwd	mm0, [edx]
-	paddd	mm7, mm0
-
-	add	edx, byte 8
-	add	ecx, byte 16
-	cmp	ecx, edi
-	jnz	.mmx_4more_loop_j
-
-	movq	mm0, mm7
-	punpckhdq	mm7, mm7
-	paddd	mm7, mm0
-	psrad	mm7, mm6
-	movd	mm1, [esi]
-	paddd	mm7, mm1
-	movd	[edi], mm7
-	psllq	mm7, 48
-	psrlq	mm4, 16
-	por	mm4, mm7
-
-	add	esi, byte 4
-	add	edi, byte 4
-
-	dec	ebx
-	jnz	short .mmx_4more_loop_i
-.mmx_end:
-	emms
-	mov	esp, ebp
-
-.end:
-	pop	edi
-	pop	esi
-	pop	ebx
-	pop	ebp
-	ret
-
-
-; **********************************************************************
-;
-;void FLAC__lpc_compute_residual_from_qlp_coefficients_wide(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
-; {
-; 	unsigned i, j;
-; 	FLAC__int64 sum;
-;
-; 	FLAC__ASSERT(order > 0);
-;
-;	for(i = 0; i < data_len; i++) {
-;		sum = 0;
-;		for(j = 0; j < order; j++)
-;			sum += qlp_coeff[j] * (FLAC__int64)data[i-j-1];
-;		residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
-;	}
-; }
-	ALIGN	16
-cident FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32
-	;[esp + 40]	residual[]
-	;[esp + 36]	lp_quantization
-	;[esp + 32]	order
-	;[esp + 28]	qlp_coeff[]
-	;[esp + 24]	data_len
-	;[esp + 20]	data[]
-
-	;ASSERT(order > 0)
-	;ASSERT(order <= 32)
-	;ASSERT(lp_quantization <= 31)
-
-	push	ebp
-	push	ebx
-	push	esi
-	push	edi
-
-	mov	ebx, [esp + 24]			; ebx = data_len
-	test	ebx, ebx
-	jz	near .end				; do nothing if data_len == 0
-
-.begin:
-	mov	eax, [esp + 32]			; eax = order
-	cmp	eax, 1
-	jg	short .i_32
-
-	mov	esi, [esp + 40]			; esi = residual[]
-	mov	edi, [esp + 20]			; edi = data[]
-	mov	ecx, [esp + 28]			; ecx = qlp_coeff[]
-	mov	ebp, [ecx]				; ebp = qlp_coeff[0]
-	mov	eax, [edi - 4]			; eax = data[-1]
-	mov	ecx, [esp + 36]			; cl = lp_quantization
-	ALIGN	16
-.i_1_loop_i:
-	imul	ebp					; edx:eax = qlp_coeff[0] * (FLAC__int64)data[i-1]
-	shrd	eax, edx, cl		; 0 <= lp_quantization <= 15
-	neg	eax
-	add	eax, [edi]
-	mov	[esi], eax
-	mov	eax, [edi]
-	add	esi, 4
-	add	edi, 4
-	dec	ebx
-	jnz	.i_1_loop_i
-	jmp	.end
-
-.mov_eip_to_eax:
-	mov	eax, [esp]
-	ret
-
-.i_32:	; eax = order
-	neg	eax
-	add	eax, eax
-	lea	ebp, [eax + eax * 4 + .jumper_0 - .get_eip0]
-	call	.mov_eip_to_eax
-.get_eip0:
-	add	ebp, eax
-	inc	ebp				; compensate for the shorter opcode on the last iteration
-
-	mov	ebx, [esp + 28]			; ebx = qlp_coeff[]
-	mov	edi, [esp + 20]			; edi = data[]
-	sub	[esp + 40], edi			; residual[] -= data[]
-
-	xor	ecx, ecx
-	xor	esi, esi
-	jmp	ebp
-
-;eax = --
-;edx = --
-;ecx = 0
-;esi = 0
-;
-;ebx = qlp_coeff[]
-;edi = data[]
-;ebp = @address
-
-	mov	eax, [ebx + 124]			; eax =  qlp_coeff[31]
-	imul	dword [edi - 128]		; edx:eax =  qlp_coeff[31] * data[i-32]
-	add	ecx, eax
-	adc	esi, edx					; sum += qlp_coeff[31] * data[i-32]
-
-	mov	eax, [ebx + 120]			; eax =  qlp_coeff[30]
-	imul	dword [edi - 124]		; edx:eax =  qlp_coeff[30] * data[i-31]
-	add	ecx, eax
-	adc	esi, edx					; sum += qlp_coeff[30] * data[i-31]
-
-	mov	eax, [ebx + 116]
-	imul	dword [edi - 120]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 112]
-	imul	dword [edi - 116]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 108]
-	imul	dword [edi - 112]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 104]
-	imul	dword [edi - 108]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 100]
-	imul	dword [edi - 104]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 96]
-	imul	dword [edi - 100]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 92]
-	imul	dword [edi - 96]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 88]
-	imul	dword [edi - 92]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 84]
-	imul	dword [edi - 88]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 80]
-	imul	dword [edi - 84]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 76]
-	imul	dword [edi - 80]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 72]
-	imul	dword [edi - 76]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 68]
-	imul	dword [edi - 72]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 64]
-	imul	dword [edi - 68]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 60]
-	imul	dword [edi - 64]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 56]
-	imul	dword [edi - 60]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 52]
-	imul	dword [edi - 56]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 48]
-	imul	dword [edi - 52]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 44]
-	imul	dword [edi - 48]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 40]
-	imul	dword [edi - 44]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 36]
-	imul	dword [edi - 40]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 32]
-	imul	dword [edi - 36]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 28]
-	imul	dword [edi - 32]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 24]
-	imul	dword [edi - 28]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 20]
-	imul	dword [edi - 24]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 16]
-	imul	dword [edi - 20]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 12]
-	imul	dword [edi - 16]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 8]
-	imul	dword [edi - 12]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 4]
-	imul	dword [edi - 8]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx]					; eax =  qlp_coeff[ 0] (NOTE: one byte missing from instruction)
-	imul	dword [edi - 4]			; edx:eax =  qlp_coeff[ 0] * data[i- 1]
-	add	ecx, eax
-	adc	esi, edx					; sum += qlp_coeff[ 0] * data[i- 1]
-
-.jumper_0:
-	mov	edx, ecx
-;esi:edx = sum
-	mov	ecx, [esp + 36]			; cl = lp_quantization
-	shrd	edx, esi, cl		; edx = (sum >> lp_quantization)
-;eax = --
-;ecx = --
-;edx = sum >> lp_q
-;esi = --
-	neg	edx						; edx = -(sum >> lp_quantization)
-	mov	eax, [esp + 40]			; residual[] - data[]
-	add	edx, [edi]				; edx = data[i] - (sum >> lp_quantization)
-	mov	[edi + eax], edx
-	add	edi, 4
-
-	dec	dword [esp + 24]
-	jz	short .end
-	xor	ecx, ecx
-	xor	esi, esi
-	jmp	ebp
-
-.end:
-	pop	edi
-	pop	esi
-	pop	ebx
-	pop	ebp
-	ret
-
-; **********************************************************************
-;
-; void FLAC__lpc_restore_signal_wide(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[])
-; {
-; 	unsigned i, j;
-; 	FLAC__int64 sum;
-;
-; 	FLAC__ASSERT(order > 0);
-;
-; 	for(i = 0; i < data_len; i++) {
-; 		sum = 0;
-; 		for(j = 0; j < order; j++)
-; 			sum += qlp_coeff[j] * (FLAC__int64)data[i-j-1];
-; 		data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
-; 	}
-; }
-	ALIGN	16
-cident FLAC__lpc_restore_signal_wide_asm_ia32
-	;[esp + 40]	data[]
-	;[esp + 36]	lp_quantization
-	;[esp + 32]	order
-	;[esp + 28]	qlp_coeff[]
-	;[esp + 24]	data_len
-	;[esp + 20]	residual[]
-
-	;ASSERT(order > 0)
-	;ASSERT(order <= 32)
-	;ASSERT(lp_quantization <= 31)
-
-	push	ebp
-	push	ebx
-	push	esi
-	push	edi
-
-	mov	ebx, [esp + 24]			; ebx = data_len
-	test	ebx, ebx
-	jz	near .end				; do nothing if data_len == 0
-
-.begin:
-	mov	eax, [esp + 32]			; eax = order
-	cmp	eax, 1
-	jg	short .x87_32
-
-	mov	esi, [esp + 20]			; esi = residual[]
-	mov	edi, [esp + 40]			; edi = data[]
-	mov	ecx, [esp + 28]			; ecx = qlp_coeff[]
-	mov	ebp, [ecx]				; ebp = qlp_coeff[0]
-	mov	eax, [edi - 4]			; eax = data[-1]
-	mov	ecx, [esp + 36]			; cl = lp_quantization
-	ALIGN	16
-.x87_1_loop_i:
-	imul	ebp					; edx:eax = qlp_coeff[0] * (FLAC__int64)data[i-1]
-	shrd	eax, edx, cl		; 0 <= lp_quantization <= 15
-;
-	add	eax, [esi]
-	mov	[edi], eax
-;
-	add	esi, 4
-	add	edi, 4
-	dec	ebx
-	jnz	.x87_1_loop_i
-	jmp	.end
-
-.mov_eip_to_eax:
-	mov	eax, [esp]
-	ret
-
-.x87_32:	; eax = order
-	neg	eax
-	add	eax, eax
-	lea	ebp, [eax + eax * 4 + .jumper_0 - .get_eip0]
-	call	.mov_eip_to_eax
-.get_eip0:
-	add	ebp, eax
-	inc	ebp				; compensate for the shorter opcode on the last iteration
-
-	mov	ebx, [esp + 28]			; ebx = qlp_coeff[]
-	mov	edi, [esp + 40]			; esi = data[]
-	sub	[esp + 20], edi			; residual[] -= data[]
-
-	xor	ecx, ecx
-	xor	esi, esi
-	jmp	ebp
-
-;eax = --
-;edx = --
-;ecx = 0
-;esi = 0
-;
-;ebx = qlp_coeff[]
-;edi = data[]
-;ebp = @address
-
-	mov	eax, [ebx + 124]			; eax =  qlp_coeff[31]
-	imul	dword [edi - 128]		; edx:eax =  qlp_coeff[31] * data[i-32]
-	add	ecx, eax
-	adc	esi, edx					; sum += qlp_coeff[31] * data[i-32]
-
-	mov	eax, [ebx + 120]			; eax =  qlp_coeff[30]
-	imul	dword [edi - 124]		; edx:eax =  qlp_coeff[30] * data[i-31]
-	add	ecx, eax
-	adc	esi, edx					; sum += qlp_coeff[30] * data[i-31]
-
-	mov	eax, [ebx + 116]
-	imul	dword [edi - 120]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 112]
-	imul	dword [edi - 116]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 108]
-	imul	dword [edi - 112]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 104]
-	imul	dword [edi - 108]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 100]
-	imul	dword [edi - 104]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 96]
-	imul	dword [edi - 100]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 92]
-	imul	dword [edi - 96]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 88]
-	imul	dword [edi - 92]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 84]
-	imul	dword [edi - 88]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 80]
-	imul	dword [edi - 84]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 76]
-	imul	dword [edi - 80]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 72]
-	imul	dword [edi - 76]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 68]
-	imul	dword [edi - 72]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 64]
-	imul	dword [edi - 68]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 60]
-	imul	dword [edi - 64]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 56]
-	imul	dword [edi - 60]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 52]
-	imul	dword [edi - 56]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 48]
-	imul	dword [edi - 52]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 44]
-	imul	dword [edi - 48]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 40]
-	imul	dword [edi - 44]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 36]
-	imul	dword [edi - 40]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 32]
-	imul	dword [edi - 36]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 28]
-	imul	dword [edi - 32]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 24]
-	imul	dword [edi - 28]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 20]
-	imul	dword [edi - 24]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 16]
-	imul	dword [edi - 20]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 12]
-	imul	dword [edi - 16]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 8]
-	imul	dword [edi - 12]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx + 4]
-	imul	dword [edi - 8]
-	add	ecx, eax
-	adc	esi, edx
-
-	mov	eax, [ebx]					; eax =  qlp_coeff[ 0] (NOTE: one byte missing from instruction)
-	imul	dword [edi - 4]			; edx:eax =  qlp_coeff[ 0] * data[i- 1]
-	add	ecx, eax
-	adc	esi, edx					; sum += qlp_coeff[ 0] * data[i- 1]
-
-.jumper_0:
-	mov	edx, ecx
-;esi:edx = sum
-	mov	ecx, [esp + 36]			; cl = lp_quantization
-	shrd	edx, esi, cl		; edx = (sum >> lp_quantization)
-;eax = --
-;ecx = --
-;edx = sum >> lp_q
-;esi = --
-;
-	mov	eax, [esp + 20]			; residual[] - data[]
-	add	edx, [edi + eax]		; edx = residual[i] + (sum >> lp_quantization)
-	mov	[edi], edx				; data[i] = residual[i] + (sum >> lp_quantization)
-	add	edi, 4
-
-	dec	dword [esp + 24]
-	jz	short .end
-	xor	ecx, ecx
-	xor	esi, esi
-	jmp	ebp
-
-.end:
-	pop	edi
-	pop	esi
-	pop	ebx
-	pop	ebp
-	ret
-
-; end

diff --git a/src/libFLAC/ia32/nasm.h b/src/libFLAC/ia32/nasm.h
deleted file mode 100644
index efa6808..0000000
--- a/src/libFLAC/ia32/nasm.h
+++ /dev/null

@@ -1,85 +0,0 @@
-;  libFLAC - Free Lossless Audio Codec library
-;  Copyright (C) 2001-2009  Josh Coalson
-;  Copyright (C) 2011-2014  Xiph.Org Foundation
-;
-;  Redistribution and use in source and binary forms, with or without
-;  modification, are permitted provided that the following conditions
-;  are met:
-;
-;  - Redistributions of source code must retain the above copyright
-;  notice, this list of conditions and the following disclaimer.
-;
-;  - Redistributions in binary form must reproduce the above copyright
-;  notice, this list of conditions and the following disclaimer in the
-;  documentation and/or other materials provided with the distribution.
-;
-;  - Neither the name of the Xiph.org Foundation nor the names of its
-;  contributors may be used to endorse or promote products derived from
-;  this software without specific prior written permission.
-;
-;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-;  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-;  A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
-;  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-;  EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-;  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-;  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-;  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-;  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-;  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-	bits 32
-
-%ifdef OBJ_FORMAT_win32
-	%define FLAC__PUBLIC_NEEDS_UNDERSCORE
-	%idefine code_section section .text align=16 class=CODE use32
-	%idefine data_section section .data align=32 class=DATA use32
-	%idefine bss_section  section .bss  align=32 class=DATA use32
-%elifdef OBJ_FORMAT_aout
-	%define FLAC__PUBLIC_NEEDS_UNDERSCORE
-	%idefine code_section section .text
-	%idefine data_section section .data
-	%idefine bss_section  section .bss
-%elifdef OBJ_FORMAT_aoutb
-	%define FLAC__PUBLIC_NEEDS_UNDERSCORE
-	%idefine code_section section .text
-	%idefine data_section section .data
-	%idefine bss_section  section .bss
-%elifdef OBJ_FORMAT_elf
-	%idefine code_section section .text align=16
-	%idefine data_section section .data align=32
-	%idefine bss_section  section .bss  align=32
-%else
-	%error unsupported object format!
-%endif
-
-%imacro cglobal 1
-	%ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
-		global _%1
-	%else
-		%if __NASM_MAJOR__ >= 2
-			global %1:function hidden
-		%else
-			global %1
-		%endif
-	%endif
-%endmacro
-
-%imacro cextern 1
-	%ifdef FLAC__PUBLIC_NEEDS_UNDERSCORE
-		extern _%1
-	%else
-		extern %1
-	%endif
-%endmacro
-
-%imacro cident 1
-_%1:
-%1:
-%endmacro
-
-%ifdef OBJ_FORMAT_elf
-section .note.GNU-stack progbits noalloc noexec nowrite align=1
-%endif
-

diff --git a/src/libFLAC/include/private/all.h b/src/libFLAC/include/private/all.h
index a4463d2..c64f9ac 100644
--- a/src/libFLAC/include/private/all.h
+++ b/src/libFLAC/include/private/all.h

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2000-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions

diff --git a/src/libFLAC/include/private/bitmath.h b/src/libFLAC/include/private/bitmath.h
index 22d894f..cfff57d 100644
--- a/src/libFLAC/include/private/bitmath.h
+++ b/src/libFLAC/include/private/bitmath.h

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2001-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -36,69 +36,93 @@
 #include "FLAC/ordinals.h"
 #include "FLAC/assert.h"
 
-/* for CHAR_BIT */
-#include <limits.h>
 #include "share/compat.h"
 
-#if defined(_MSC_VER) && (_MSC_VER >= 1400)
+#if defined(_MSC_VER)
 #include <intrin.h> /* for _BitScanReverse* */
 #endif
 
 /* Will never be emitted for MSVC, GCC, Intel compilers */
-static inline unsigned int FLAC__clz_soft_uint32(unsigned int word)
-{
-    static const unsigned char byte_to_unary_table[] = {
-    8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
-    3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-    };
+static inline uint32_t FLAC__clz_soft_uint32(FLAC__uint32 word) {
+  static const uint8_t byte_to_unary_table[] = {
+      8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3,
+      3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+      2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+      1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+      0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  };
 
-    return (word) > 0xffffff ? byte_to_unary_table[(word) >> 24] :
-    (word) > 0xffff ? byte_to_unary_table[(word) >> 16] + 8 :
-    (word) > 0xff ? byte_to_unary_table[(word) >> 8] + 16 :
-    byte_to_unary_table[(word)] + 24;
+  return word > 0xffffff ? byte_to_unary_table[word >> 24]
+         : word > 0xffff ? byte_to_unary_table[word >> 16] + 8
+         : word > 0xff   ? byte_to_unary_table[word >> 8] + 16
+                         : byte_to_unary_table[word] + 24;
 }
 
-static inline unsigned int FLAC__clz_uint32(FLAC__uint32 v)
-{
-/* Never used with input 0 */
-    FLAC__ASSERT(v > 0);
+static inline uint32_t FLAC__clz_uint32(FLAC__uint32 v) {
+  /* Never used with input 0 */
+  FLAC__ASSERT(v > 0);
 #if defined(__INTEL_COMPILER)
-    return _bit_scan_reverse(v) ^ 31U;
+  return _bit_scan_reverse(v) ^ 31U;
 #elif defined(__GNUC__) && (__GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
 /* This will translate either to (bsr ^ 31U), clz , ctlz, cntlz, lzcnt depending on
  * -march= setting or to a software routine in exotic machines. */
-    return __builtin_clz(v);
-#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
-    {
-        unsigned long idx;
-        _BitScanReverse(&idx, v);
-        return idx ^ 31U;
-    }
+return __builtin_clz(v);
+#elif defined(_MSC_VER)
+  {
+    uint32_t idx;
+    _BitScanReverse(&idx, v);
+    return idx ^ 31U;
+  }
 #else
-    return FLAC__clz_soft_uint32(v);
+  return FLAC__clz_soft_uint32(v);
 #endif
 }
 
-/* This one works with input 0 */
-static inline unsigned int FLAC__clz2_uint32(FLAC__uint32 v)
+/* Used when 64-bit bsr/clz is unavailable; can use 32-bit bsr/clz when possible
+ */
+static inline uint32_t FLAC__clz_soft_uint64(FLAC__uint64 word) {
+  return (FLAC__uint32)(word >> 32)
+             ? FLAC__clz_uint32((FLAC__uint32)(word >> 32))
+             : FLAC__clz_uint32((FLAC__uint32)word) + 32;
+}
+
+static inline uint32_t FLAC__clz_uint64(FLAC__uint64 v) {
+  /* Never used with input 0 */
+  FLAC__ASSERT(v > 0);
+#if defined(__GNUC__) && \
+    (__GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
+  return __builtin_clzll(v);
+#elif (defined(__INTEL_COMPILER) || defined(_MSC_VER)) && \
+    (defined(_M_IA64) || defined(_M_X64))
 {
-    if (!v)
-        return 32;
-    return FLAC__clz_uint32(v);
+  uint32_t idx;
+  _BitScanReverse64(&idx, v);
+  return idx ^ 63U;
+}
+#else
+  return FLAC__clz_soft_uint64(v);
+#endif
+}
+
+/* These two functions work with input 0 */
+static inline uint32_t FLAC__clz2_uint32(FLAC__uint32 v) {
+  if (!v) {
+    return 32;
+  }
+  return FLAC__clz_uint32(v);
+}
+
+static inline uint32_t FLAC__clz2_uint64(FLAC__uint64 v) {
+  if (!v) {
+    return 64;
+  }
+  return FLAC__clz_uint64(v);
 }
 
 /* An example of what FLAC__bitmath_ilog2() computes:
@@ -124,63 +148,57 @@
  * ilog2(18) = 4
  */
 
-static inline unsigned FLAC__bitmath_ilog2(FLAC__uint32 v)
-{
-    FLAC__ASSERT(v > 0);
+static inline uint32_t FLAC__bitmath_ilog2(FLAC__uint32 v) {
+  FLAC__ASSERT(v > 0);
 #if defined(__INTEL_COMPILER)
-    return _bit_scan_reverse(v);
-#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
-    {
-        unsigned long idx;
-        _BitScanReverse(&idx, v);
-        return idx;
-    }
-#else
-    return sizeof(FLAC__uint32) * CHAR_BIT  - 1 - FLAC__clz_uint32(v);
-#endif
-}
-
-
-#ifdef FLAC__INTEGER_ONLY_LIBRARY /* Unused otherwise */
-
-static inline unsigned FLAC__bitmath_ilog2_wide(FLAC__uint64 v)
+  return _bit_scan_reverse(v);
+#elif defined(_MSC_VER)
 {
-    FLAC__ASSERT(v > 0);
-#if defined(__GNUC__) && (__GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
-    return sizeof(FLAC__uint64) * CHAR_BIT - 1 - __builtin_clzll(v);
-/* Sorry, only supported in x64/Itanium.. and both have fast FPU which makes integer-only encoder pointless */
-#elif (defined(_MSC_VER) && (_MSC_VER >= 1400)) && (defined(_M_IA64) || defined(_M_X64))
-    {
-        unsigned long idx;
-        _BitScanReverse64(&idx, v);
-        return idx;
-    }
+  uint32_t idx;
+  _BitScanReverse(&idx, v);
+  return idx;
+}
 #else
-/*  Brain-damaged compilers will use the fastest possible way that is,
-    de Bruijn sequences (http://supertech.csail.mit.edu/papers/debruijn.pdf)
-    (C) Timothy B. Terriberry (tterribe@xiph.org) 2001-2009 CC0 (Public domain).
-*/
-    {
-        static const unsigned char DEBRUIJN_IDX64[64]={
-            0, 1, 2, 7, 3,13, 8,19, 4,25,14,28, 9,34,20,40,
-            5,17,26,38,15,46,29,48,10,31,35,54,21,50,41,57,
-            63, 6,12,18,24,27,33,39,16,37,45,47,30,53,49,56,
-            62,11,23,32,36,44,52,55,61,22,43,51,60,42,59,58
-        };
-        v|= v>>1;
-        v|= v>>2;
-        v|= v>>4;
-        v|= v>>8;
-        v|= v>>16;
-        v|= v>>32;
-        v= (v>>1)+1;
-        return DEBRUIJN_IDX64[v*0x218A392CD3D5DBF>>58&0x3F];
-    }
+  return FLAC__clz_uint32(v) ^ 31U;
 #endif
 }
-#endif
 
-unsigned FLAC__bitmath_silog2(int v);
-unsigned FLAC__bitmath_silog2_wide(FLAC__int64 v);
+static inline uint32_t FLAC__bitmath_ilog2_wide(FLAC__uint64 v) {
+  FLAC__ASSERT(v > 0);
+#if defined(__GNUC__) && (__GNUC__ >= 4 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
+  return __builtin_clzll(v) ^ 63U;
+/* Sorry, only supported in x64/Itanium.. and both have fast FPU which makes integer-only encoder pointless */
+#elif (defined(__INTEL_COMPILER) || defined(_MSC_VER)) && \
+    (defined(_M_IA64) || defined(_M_X64))
+{
+  uint32_t idx;
+  _BitScanReverse64(&idx, v);
+  return idx;
+}
+#else
+  /*  Brain-damaged compilers will use the fastest possible way that is,
+          de Bruijn sequences
+     (http://supertech.csail.mit.edu/papers/debruijn.pdf) (C) Timothy B.
+     Terriberry (tterribe@xiph.org) 2001-2009 CC0 (Public domain).
+  */
+  {
+    static const uint8_t DEBRUIJN_IDX64[64] = {
+        0,  1,  2,  7,  3,  13, 8,  19, 4,  25, 14, 28, 9,  34, 20, 40,
+        5,  17, 26, 38, 15, 46, 29, 48, 10, 31, 35, 54, 21, 50, 41, 57,
+        63, 6,  12, 18, 24, 27, 33, 39, 16, 37, 45, 47, 30, 53, 49, 56,
+        62, 11, 23, 32, 36, 44, 52, 55, 61, 22, 43, 51, 60, 42, 59, 58};
+    v |= v >> 1;
+    v |= v >> 2;
+    v |= v >> 4;
+    v |= v >> 8;
+    v |= v >> 16;
+    v |= v >> 32;
+    v = (v >> 1) + 1;
+    return DEBRUIJN_IDX64[v * FLAC__U64L(0x218A392CD3D5DBF) >> 58 & 0x3F];
+  }
+#endif
+}
+
+uint32_t FLAC__bitmath_silog2(FLAC__int64 v);
 
 #endif

diff --git a/src/libFLAC/include/private/bitreader.h b/src/libFLAC/include/private/bitreader.h
index 4dea8d0..aa70d35 100644
--- a/src/libFLAC/include/private/bitreader.h
+++ b/src/libFLAC/include/private/bitreader.h

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2000-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -53,6 +53,9 @@
 FLAC__bool FLAC__bitreader_init(FLAC__BitReader *br, FLAC__BitReaderReadCallback rcb, void *cd);
 void FLAC__bitreader_free(FLAC__BitReader *br); /* does not 'free(br)' */
 FLAC__bool FLAC__bitreader_clear(FLAC__BitReader *br);
+void FLAC__bitreader_set_framesync_location(FLAC__BitReader* br);
+FLAC__bool FLAC__bitreader_rewind_to_after_last_seen_framesync(
+    FLAC__BitReader* br);
 void FLAC__bitreader_dump(const FLAC__BitReader *br, FILE *out);
 
 /*
@@ -65,27 +68,59 @@
  * info functions
  */
 FLAC__bool FLAC__bitreader_is_consumed_byte_aligned(const FLAC__BitReader *br);
-unsigned FLAC__bitreader_bits_left_for_byte_alignment(const FLAC__BitReader *br);
-unsigned FLAC__bitreader_get_input_bits_unconsumed(const FLAC__BitReader *br);
+uint32_t FLAC__bitreader_bits_left_for_byte_alignment(
+    const FLAC__BitReader* br);
+uint32_t FLAC__bitreader_get_input_bits_unconsumed(const FLAC__BitReader* br);
+void FLAC__bitreader_set_limit(FLAC__BitReader* br, uint32_t limit);
+void FLAC__bitreader_remove_limit(FLAC__BitReader* br);
+uint32_t FLAC__bitreader_limit_remaining(FLAC__BitReader* br);
+void FLAC__bitreader_limit_invalidate(FLAC__BitReader* br);
 
 /*
  * read functions
  */
 
-FLAC__bool FLAC__bitreader_read_raw_uint32(FLAC__BitReader *br, FLAC__uint32 *val, unsigned bits);
-FLAC__bool FLAC__bitreader_read_raw_int32(FLAC__BitReader *br, FLAC__int32 *val, unsigned bits);
-FLAC__bool FLAC__bitreader_read_raw_uint64(FLAC__BitReader *br, FLAC__uint64 *val, unsigned bits);
+FLAC__bool FLAC__bitreader_read_raw_uint32(FLAC__BitReader* br,
+                                           FLAC__uint32* val,
+                                           uint32_t bits);
+FLAC__bool FLAC__bitreader_read_raw_int32(FLAC__BitReader* br,
+                                          FLAC__int32* val,
+                                          uint32_t bits);
+FLAC__bool FLAC__bitreader_read_raw_uint64(FLAC__BitReader* br,
+                                           FLAC__uint64* val,
+                                           uint32_t bits);
+FLAC__bool FLAC__bitreader_read_raw_int64(FLAC__BitReader* br,
+                                          FLAC__int64* val,
+                                          uint32_t bits);
 FLAC__bool FLAC__bitreader_read_uint32_little_endian(FLAC__BitReader *br, FLAC__uint32 *val); /*only for bits=32*/
-FLAC__bool FLAC__bitreader_skip_bits_no_crc(FLAC__BitReader *br, unsigned bits); /* WATCHOUT: does not CRC the skipped data! */ /*@@@@ add to unit tests */
-FLAC__bool FLAC__bitreader_skip_byte_block_aligned_no_crc(FLAC__BitReader *br, unsigned nvals); /* WATCHOUT: does not CRC the read data! */
-FLAC__bool FLAC__bitreader_read_byte_block_aligned_no_crc(FLAC__BitReader *br, FLAC__byte *val, unsigned nvals); /* WATCHOUT: does not CRC the read data! */
-FLAC__bool FLAC__bitreader_read_unary_unsigned(FLAC__BitReader *br, unsigned *val);
-FLAC__bool FLAC__bitreader_read_rice_signed(FLAC__BitReader *br, int *val, unsigned parameter);
-FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader *br, int vals[], unsigned nvals, unsigned parameter);
+FLAC__bool FLAC__bitreader_skip_bits_no_crc(FLAC__BitReader* br, uint32_t bits);
+/* WATCHOUT: does not CRC the skipped data! */ /*@@@@ add to unit tests */
+FLAC__bool FLAC__bitreader_skip_byte_block_aligned_no_crc(
+    FLAC__BitReader* br,
+    uint32_t nvals); /* WATCHOUT: does not CRC the read data! */
+FLAC__bool FLAC__bitreader_read_byte_block_aligned_no_crc(
+    FLAC__BitReader* br,
+    FLAC__byte* val,
+    uint32_t nvals); /* WATCHOUT: does not CRC the read data! */
+FLAC__bool FLAC__bitreader_read_unary_unsigned(FLAC__BitReader* br,
+                                               uint32_t* val);
+FLAC__bool FLAC__bitreader_read_rice_signed(FLAC__BitReader* br,
+                                            int* val,
+                                            uint32_t parameter);
+FLAC__bool FLAC__bitreader_read_rice_signed_block(FLAC__BitReader* br,
+                                                  int vals[],
+                                                  uint32_t nvals,
+                                                  uint32_t parameter);
 #if 0 /* UNUSED */
-FLAC__bool FLAC__bitreader_read_golomb_signed(FLAC__BitReader *br, int *val, unsigned parameter);
-FLAC__bool FLAC__bitreader_read_golomb_unsigned(FLAC__BitReader *br, unsigned *val, unsigned parameter);
+FLAC__bool FLAC__bitreader_read_golomb_signed(FLAC__BitReader *br, int *val, uint32_t parameter);
+FLAC__bool FLAC__bitreader_read_golomb_unsigned(FLAC__BitReader *br, uint32_t *val, uint32_t parameter);
 #endif
-FLAC__bool FLAC__bitreader_read_utf8_uint32(FLAC__BitReader *br, FLAC__uint32 *val, FLAC__byte *raw, unsigned *rawlen);
-FLAC__bool FLAC__bitreader_read_utf8_uint64(FLAC__BitReader *br, FLAC__uint64 *val, FLAC__byte *raw, unsigned *rawlen);
+FLAC__bool FLAC__bitreader_read_utf8_uint32(FLAC__BitReader* br,
+                                            FLAC__uint32* val,
+                                            FLAC__byte* raw,
+                                            uint32_t* rawlen);
+FLAC__bool FLAC__bitreader_read_utf8_uint64(FLAC__BitReader* br,
+                                            FLAC__uint64* val,
+                                            FLAC__byte* raw,
+                                            uint32_t* rawlen);
 #endif

diff --git a/src/libFLAC/include/private/bitwriter.h b/src/libFLAC/include/private/bitwriter.h
index 3b1362d..29433d2 100644
--- a/src/libFLAC/include/private/bitwriter.h
+++ b/src/libFLAC/include/private/bitwriter.h

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2000-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -64,7 +64,9 @@
  * info functions
  */
 FLAC__bool FLAC__bitwriter_is_byte_aligned(const FLAC__BitWriter *bw);
-unsigned FLAC__bitwriter_get_input_bits_unconsumed(const FLAC__BitWriter *bw); /* can be called anytime, returns total # of bits unconsumed */
+uint32_t FLAC__bitwriter_get_input_bits_unconsumed(
+    const FLAC__BitWriter*
+        bw); /* can be called anytime, returns total # of bits unconsumed */
 
 /*
  * direct buffer access
@@ -79,23 +81,40 @@
 /*
  * write functions
  */
-FLAC__bool FLAC__bitwriter_write_zeroes(FLAC__BitWriter *bw, unsigned bits);
-FLAC__bool FLAC__bitwriter_write_raw_uint32(FLAC__BitWriter *bw, FLAC__uint32 val, unsigned bits);
-FLAC__bool FLAC__bitwriter_write_raw_int32(FLAC__BitWriter *bw, FLAC__int32 val, unsigned bits);
-FLAC__bool FLAC__bitwriter_write_raw_uint64(FLAC__BitWriter *bw, FLAC__uint64 val, unsigned bits);
+FLAC__bool FLAC__bitwriter_write_zeroes(FLAC__BitWriter* bw, uint32_t bits);
+FLAC__bool FLAC__bitwriter_write_raw_uint32(FLAC__BitWriter* bw,
+                                            FLAC__uint32 val,
+                                            uint32_t bits);
+FLAC__bool FLAC__bitwriter_write_raw_int32(FLAC__BitWriter* bw,
+                                           FLAC__int32 val,
+                                           uint32_t bits);
+FLAC__bool FLAC__bitwriter_write_raw_uint64(FLAC__BitWriter* bw,
+                                            FLAC__uint64 val,
+                                            uint32_t bits);
+FLAC__bool FLAC__bitwriter_write_raw_int64(FLAC__BitWriter* bw,
+                                           FLAC__int64 val,
+                                           uint32_t bits);
 FLAC__bool FLAC__bitwriter_write_raw_uint32_little_endian(FLAC__BitWriter *bw, FLAC__uint32 val); /*only for bits=32*/
-FLAC__bool FLAC__bitwriter_write_byte_block(FLAC__BitWriter *bw, const FLAC__byte vals[], unsigned nvals);
-FLAC__bool FLAC__bitwriter_write_unary_unsigned(FLAC__BitWriter *bw, unsigned val);
-unsigned FLAC__bitwriter_rice_bits(FLAC__int32 val, unsigned parameter);
+FLAC__bool FLAC__bitwriter_write_byte_block(FLAC__BitWriter* bw,
+                                            const FLAC__byte vals[],
+                                            uint32_t nvals);
+FLAC__bool FLAC__bitwriter_write_unary_unsigned(FLAC__BitWriter* bw,
+                                                uint32_t val);
+uint32_t FLAC__bitwriter_rice_bits(FLAC__int32 val, uint32_t parameter);
 #if 0 /* UNUSED */
-unsigned FLAC__bitwriter_golomb_bits_signed(int val, unsigned parameter);
-unsigned FLAC__bitwriter_golomb_bits_unsigned(unsigned val, unsigned parameter);
+uint32_t FLAC__bitwriter_golomb_bits_signed(int val, uint32_t parameter);
+uint32_t FLAC__bitwriter_golomb_bits_unsigned(uint32_t val, uint32_t parameter);
 #endif
-FLAC__bool FLAC__bitwriter_write_rice_signed(FLAC__BitWriter *bw, FLAC__int32 val, unsigned parameter);
-FLAC__bool FLAC__bitwriter_write_rice_signed_block(FLAC__BitWriter *bw, const FLAC__int32 *vals, unsigned nvals, unsigned parameter);
+FLAC__bool FLAC__bitwriter_write_rice_signed(FLAC__BitWriter* bw,
+                                             FLAC__int32 val,
+                                             uint32_t parameter);
+FLAC__bool FLAC__bitwriter_write_rice_signed_block(FLAC__BitWriter* bw,
+                                                   const FLAC__int32* vals,
+                                                   uint32_t nvals,
+                                                   uint32_t parameter);
 #if 0 /* UNUSED */
-FLAC__bool FLAC__bitwriter_write_golomb_signed(FLAC__BitWriter *bw, int val, unsigned parameter);
-FLAC__bool FLAC__bitwriter_write_golomb_unsigned(FLAC__BitWriter *bw, unsigned val, unsigned parameter);
+FLAC__bool FLAC__bitwriter_write_golomb_signed(FLAC__BitWriter *bw, int val, uint32_t parameter);
+FLAC__bool FLAC__bitwriter_write_golomb_unsigned(FLAC__BitWriter *bw, uint32_t val, uint32_t parameter);
 #endif
 FLAC__bool FLAC__bitwriter_write_utf8_uint32(FLAC__BitWriter *bw, FLAC__uint32 val);
 FLAC__bool FLAC__bitwriter_write_utf8_uint64(FLAC__BitWriter *bw, FLAC__uint64 val);

diff --git a/src/libFLAC/include/private/cpu.h b/src/libFLAC/include/private/cpu.h
index 8927897..081692f 100644
--- a/src/libFLAC/include/private/cpu.h
+++ b/src/libFLAC/include/private/cpu.h

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2001-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -39,7 +39,29 @@
 #include <config.h>
 #endif
 
-#if defined FLAC__HAS_X86INTRIN
+#ifndef FLAC__CPU_X86_64
+
+#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__) || \
+    defined(__x86_64) || defined(_M_X64) || defined(_M_AMD64)
+#define FLAC__CPU_X86_64
+#endif
+
+#endif
+
+#ifndef FLAC__CPU_IA32
+
+#if defined(__i386__) || defined(__i486__) || defined(__i586__) || \
+    defined(__i686__) || defined(__i386) || defined(_M_IX86)
+#define FLAC__CPU_IA32
+#endif
+
+#endif
+
+#ifndef __has_attribute
+#define __has_attribute(x) 0
+#endif
+
+#if FLAC__HAS_X86INTRIN
 /* SSE intrinsics support by ICC/MSVC/GCC */
 #if defined __INTEL_COMPILER
   #define FLAC__SSE_TARGET(x)
@@ -49,13 +71,38 @@
     #define FLAC__SSSE3_SUPPORTED 1
     #define FLAC__SSE4_1_SUPPORTED 1
   #endif
-  #if (__INTEL_COMPILER >= 1110) /* Intel C++ Compiler 11.1 */
-    #define FLAC__AVX_SUPPORTED 1
-  #endif
-  #if (__INTEL_COMPILER >= 1300) /* Intel C++ Compiler 13.0 */
-    #define FLAC__AVX2_SUPPORTED 1
-    #define FLAC__FMA_SUPPORTED 1
-  #endif
+#ifdef FLAC__USE_AVX
+#if (__INTEL_COMPILER >= 1110) /* Intel C++ Compiler 11.1 */
+#define FLAC__AVX_SUPPORTED 1
+#endif
+#if (__INTEL_COMPILER >= 1300) /* Intel C++ Compiler 13.0 */
+#define FLAC__AVX2_SUPPORTED 1
+#define FLAC__FMA_SUPPORTED 1
+#endif
+#endif
+#elif defined __clang__ && __has_attribute(__target__) /* clang */
+#define FLAC__SSE_TARGET(x) __attribute__((__target__(x)))
+#define FLAC__SSE_SUPPORTED 1
+#define FLAC__SSE2_SUPPORTED 1
+#define FLAC__SSSE3_SUPPORTED 1
+#define FLAC__SSE4_1_SUPPORTED 1
+#ifdef FLAC__USE_AVX
+#define FLAC__AVX_SUPPORTED 1
+#define FLAC__AVX2_SUPPORTED 1
+#define FLAC__FMA_SUPPORTED 1
+#endif
+#elif defined __GNUC__ && !defined __clang__ && \
+    (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9)) /* GCC 4.9+ */
+#define FLAC__SSE_TARGET(x) __attribute__((__target__(x)))
+#define FLAC__SSE_SUPPORTED 1
+#define FLAC__SSE2_SUPPORTED 1
+#define FLAC__SSSE3_SUPPORTED 1
+#define FLAC__SSE4_1_SUPPORTED 1
+#ifdef FLAC__USE_AVX
+#define FLAC__AVX_SUPPORTED 1
+#define FLAC__AVX2_SUPPORTED 1
+#define FLAC__FMA_SUPPORTED 1
+#endif
 #elif defined _MSC_VER
   #define FLAC__SSE_TARGET(x)
   #define FLAC__SSE_SUPPORTED 1
@@ -64,105 +111,91 @@
     #define FLAC__SSSE3_SUPPORTED 1
     #define FLAC__SSE4_1_SUPPORTED 1
   #endif
-  #if (_MSC_FULL_VER >= 160040219) /* MS Visual Studio 2010 SP1 */
-    #define FLAC__AVX_SUPPORTED 1
-  #endif
-  #if (_MSC_VER >= 1700) /* MS Visual Studio 2012 */
-    #define FLAC__AVX2_SUPPORTED 1
-    #define FLAC__FMA_SUPPORTED 1
-  #endif
-#elif defined __GNUC__
-  #if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 9)) /* since GCC 4.9 -msse.. compiler options aren't necessary */
-    #define FLAC__SSE_TARGET(x) __attribute__ ((__target__ (x)))
-    #define FLAC__SSE_SUPPORTED 1
-    #define FLAC__SSE2_SUPPORTED 1
-    #define FLAC__SSSE3_SUPPORTED 1
-    #define FLAC__SSE4_1_SUPPORTED 1
-    #define FLAC__AVX_SUPPORTED 1
-    #define FLAC__AVX2_SUPPORTED 1
-    #define FLAC__FMA_SUPPORTED 1
-  #else /* for GCC older than 4.9 */
-    #define FLAC__SSE_TARGET(x)
-    #ifdef __SSE__
-      #define FLAC__SSE_SUPPORTED 1
-    #endif
-    #ifdef __SSE2__
-      #define FLAC__SSE2_SUPPORTED 1
-    #endif
-    #ifdef __SSSE3__
-      #define FLAC__SSSE3_SUPPORTED 1
-    #endif
-    #ifdef __SSE4_1__
-      #define FLAC__SSE4_1_SUPPORTED 1
-    #endif
-    #ifdef __AVX__
-      #define FLAC__AVX_SUPPORTED 1
-    #endif
-    #ifdef __AVX2__
-      #define FLAC__AVX2_SUPPORTED 1
-    #endif
-    #ifdef __FMA__
-      #define FLAC__FMA_SUPPORTED 1
-    #endif
-  #endif /* GCC version */
+#ifdef FLAC__USE_AVX
+#if (_MSC_FULL_VER >= 160040219) /* MS Visual Studio 2010 SP1 */
+#define FLAC__AVX_SUPPORTED 1
+#endif
+#if (_MSC_VER >= 1700) /* MS Visual Studio 2012 */
+#define FLAC__AVX2_SUPPORTED 1
+#define FLAC__FMA_SUPPORTED 1
+#endif
+#endif
+#else
+#define FLAC__SSE_TARGET(x)
+#ifdef __SSE__
+#define FLAC__SSE_SUPPORTED 1
+#endif
+#ifdef __SSE2__
+#define FLAC__SSE2_SUPPORTED 1
+#endif
+#ifdef __SSSE3__
+#define FLAC__SSSE3_SUPPORTED 1
+#endif
+#ifdef __SSE4_1__
+#define FLAC__SSE4_1_SUPPORTED 1
+#endif
+#ifdef FLAC__USE_AVX
+#ifdef __AVX__
+#define FLAC__AVX_SUPPORTED 1
+#endif
+#ifdef __AVX2__
+#define FLAC__AVX2_SUPPORTED 1
+#endif
+#ifdef __FMA__
+#define FLAC__FMA_SUPPORTED 1
+#endif
+#endif
 #endif /* compiler version */
 #endif /* intrinsics support */
 
+#ifndef FLAC__AVX_SUPPORTED
+#define FLAC__AVX_SUPPORTED 0
+#endif
+
 typedef enum {
-	FLAC__CPUINFO_TYPE_IA32,
-	FLAC__CPUINFO_TYPE_X86_64,
-	FLAC__CPUINFO_TYPE_UNKNOWN
+  FLAC__CPUINFO_TYPE_IA32,
+  FLAC__CPUINFO_TYPE_X86_64,
+  FLAC__CPUINFO_TYPE_PPC,
+  FLAC__CPUINFO_TYPE_UNKNOWN
 } FLAC__CPUInfo_Type;
 
-#if defined FLAC__CPU_IA32
 typedef struct {
-	FLAC__bool cmov;
-	FLAC__bool mmx;
-	FLAC__bool sse;
-	FLAC__bool sse2;
+  FLAC__bool intel;
 
-	FLAC__bool sse3;
-	FLAC__bool ssse3;
-	FLAC__bool sse41;
-	FLAC__bool sse42;
-	FLAC__bool avx;
-	FLAC__bool avx2;
-	FLAC__bool fma;
-} FLAC__CPUInfo_IA32;
-#elif defined FLAC__CPU_X86_64
-typedef struct {
-	FLAC__bool sse3;
-	FLAC__bool ssse3;
-	FLAC__bool sse41;
-	FLAC__bool sse42;
-	FLAC__bool avx;
-	FLAC__bool avx2;
-	FLAC__bool fma;
+  FLAC__bool cmov;
+  FLAC__bool mmx;
+  FLAC__bool sse;
+  FLAC__bool sse2;
+
+  FLAC__bool sse3;
+  FLAC__bool ssse3;
+  FLAC__bool sse41;
+  FLAC__bool sse42;
+  FLAC__bool avx;
+  FLAC__bool avx2;
+  FLAC__bool fma;
 } FLAC__CPUInfo_x86;
-#endif
+
+typedef struct {
+  FLAC__bool arch_3_00;
+  FLAC__bool arch_2_07;
+} FLAC__CPUInfo_ppc;
 
 typedef struct {
 	FLAC__bool use_asm;
-	FLAC__CPUInfo_Type type;
-#if defined FLAC__CPU_IA32
-	FLAC__CPUInfo_IA32 ia32;
-#elif defined FLAC__CPU_X86_64
-	FLAC__CPUInfo_x86 x86;
-#endif
+        FLAC__CPUInfo_Type type;
+        FLAC__CPUInfo_x86 x86;
+        FLAC__CPUInfo_ppc ppc;
 } FLAC__CPUInfo;
 
 void FLAC__cpu_info(FLAC__CPUInfo *info);
 
-#ifndef FLAC__NO_ASM
-# if defined FLAC__CPU_IA32 && defined FLAC__HAS_NASM
 FLAC__uint32 FLAC__cpu_have_cpuid_asm_ia32(void);
-void         FLAC__cpu_info_asm_ia32(FLAC__uint32 *flags_edx, FLAC__uint32 *flags_ecx);
-# endif
-# if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
-FLAC__uint32 FLAC__cpu_have_cpuid_x86(void);
-void         FLAC__cpu_info_x86(FLAC__uint32 level, FLAC__uint32 *eax, FLAC__uint32 *ebx, FLAC__uint32 *ecx, FLAC__uint32 *edx);
-FLAC__uint32 FLAC__cpu_xgetbv_x86(void);
-# endif
-#endif
+
+void FLAC__cpu_info_asm_ia32(FLAC__uint32 level,
+                             FLAC__uint32* eax,
+                             FLAC__uint32* ebx,
+                             FLAC__uint32* ecx,
+                             FLAC__uint32* edx);
 
 #endif

diff --git a/src/libFLAC/include/private/crc.h b/src/libFLAC/include/private/crc.h
index 29c512c..19470d4 100644
--- a/src/libFLAC/include/private/crc.h
+++ b/src/libFLAC/include/private/crc.h

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2000-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -39,24 +39,28 @@
 ** polynomial = x^8 + x^2 + x^1 + x^0
 ** init = 0
 */
-extern FLAC__byte const FLAC__crc8_table[256];
-#define FLAC__CRC8_UPDATE(data, crc) (crc) = FLAC__crc8_table[(crc) ^ (data)];
-void FLAC__crc8_update(const FLAC__byte data, FLAC__uint8 *crc);
-void FLAC__crc8_update_block(const FLAC__byte *data, unsigned len, FLAC__uint8 *crc);
-FLAC__uint8 FLAC__crc8(const FLAC__byte *data, unsigned len);
+FLAC__uint8 FLAC__crc8(const FLAC__byte* data, uint32_t len);
 
 /* 16 bit CRC generator, MSB shifted first
 ** polynomial = x^16 + x^15 + x^2 + x^0
 ** init = 0
 */
-extern unsigned const FLAC__crc16_table[256];
+extern FLAC__uint16 const FLAC__crc16_table[8][256];
 
-#define FLAC__CRC16_UPDATE(data, crc) ((((crc)<<8) & 0xffff) ^ FLAC__crc16_table[((crc)>>8) ^ (data)])
+#define FLAC__CRC16_UPDATE(data, crc) \
+  ((((crc) << 8) & 0xffff) ^ FLAC__crc16_table[0][((crc) >> 8) ^ (data)])
 /* this alternate may be faster on some systems/compilers */
 #if 0
-#define FLAC__CRC16_UPDATE(data, crc) ((((crc)<<8) ^ FLAC__crc16_table[((crc)>>8) ^ (data)]) & 0xffff)
+#define FLAC__CRC16_UPDATE(data, crc) \
+  ((((crc) << 8) ^ FLAC__crc16_table[0][((crc) >> 8) ^ (data)]) & 0xffff)
 #endif
 
-unsigned FLAC__crc16(const FLAC__byte *data, unsigned len);
+FLAC__uint16 FLAC__crc16(const FLAC__byte* data, uint32_t len);
+FLAC__uint16 FLAC__crc16_update_words32(const FLAC__uint32* words,
+                                        uint32_t len,
+                                        FLAC__uint16 crc);
+FLAC__uint16 FLAC__crc16_update_words64(const FLAC__uint64* words,
+                                        uint32_t len,
+                                        FLAC__uint16 crc);
 
 #endif

diff --git a/src/libFLAC/include/private/fixed.h b/src/libFLAC/include/private/fixed.h
index dcc4715..d320fe1 100644
--- a/src/libFLAC/include/private/fixed.h
+++ b/src/libFLAC/include/private/fixed.h

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2000-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -54,26 +54,63 @@
  *	OUT residual_bits_per_sample[0,FLAC__MAX_FIXED_ORDER]
  */
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
-unsigned FLAC__fixed_compute_best_predictor(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
-unsigned FLAC__fixed_compute_best_predictor_wide(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
+uint32_t FLAC__fixed_compute_best_predictor(
+    const FLAC__int32 data[],
+    uint32_t data_len,
+    float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]);
+uint32_t FLAC__fixed_compute_best_predictor_wide(
+    const FLAC__int32 data[],
+    uint32_t data_len,
+    float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]);
+uint32_t FLAC__fixed_compute_best_predictor_limit_residual(
+    const FLAC__int32 data[],
+    uint32_t data_len,
+    float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]);
+uint32_t FLAC__fixed_compute_best_predictor_limit_residual_33bit(
+    const FLAC__int64 data[],
+    uint32_t data_len,
+    float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]);
 # ifndef FLAC__NO_ASM
-#  if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
+#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN
 #   ifdef FLAC__SSE2_SUPPORTED
-unsigned FLAC__fixed_compute_best_predictor_intrin_sse2(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]);
-unsigned FLAC__fixed_compute_best_predictor_wide_intrin_sse2(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]);
+uint32_t FLAC__fixed_compute_best_predictor_intrin_sse2(
+    const FLAC__int32 data[],
+    uint32_t data_len,
+    float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]);
+uint32_t FLAC__fixed_compute_best_predictor_wide_intrin_sse2(
+    const FLAC__int32 data[],
+    uint32_t data_len,
+    float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]);
 #   endif
 #   ifdef FLAC__SSSE3_SUPPORTED
-unsigned FLAC__fixed_compute_best_predictor_intrin_ssse3(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
-unsigned FLAC__fixed_compute_best_predictor_wide_intrin_ssse3(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]);
+uint32_t FLAC__fixed_compute_best_predictor_intrin_ssse3(
+    const FLAC__int32 data[],
+    uint32_t data_len,
+    float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]);
+uint32_t FLAC__fixed_compute_best_predictor_wide_intrin_ssse3(
+    const FLAC__int32 data[],
+    uint32_t data_len,
+    float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]);
 #   endif
-#  endif
-#  if defined FLAC__CPU_IA32 && defined FLAC__HAS_NASM
-unsigned FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
-#  endif
+#endif
 # endif
 #else
-unsigned FLAC__fixed_compute_best_predictor(const FLAC__int32 data[], unsigned data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
-unsigned FLAC__fixed_compute_best_predictor_wide(const FLAC__int32 data[], unsigned data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
+uint32_t FLAC__fixed_compute_best_predictor(
+    const FLAC__int32 data[],
+    uint32_t data_len,
+    FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]);
+uint32_t FLAC__fixed_compute_best_predictor_wide(
+    const FLAC__int32 data[],
+    uint32_t data_len,
+    FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]);
+uint32_t FLAC__fixed_compute_best_predictor_limit_residual(
+    const FLAC__int32 data[],
+    uint32_t data_len,
+    FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]);
+uint32_t FLAC__fixed_compute_best_predictor_limit_residual_33bit(
+    const FLAC__int64 data[],
+    uint32_t data_len,
+    FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER + 1]);
 #endif
 
 /*
@@ -87,7 +124,18 @@
  *	IN order <= FLAC__MAX_FIXED_ORDER fixed-predictor order
  *	OUT residual[0,data_len-1]        residual signal
  */
-void FLAC__fixed_compute_residual(const FLAC__int32 data[], unsigned data_len, unsigned order, FLAC__int32 residual[]);
+void FLAC__fixed_compute_residual(const FLAC__int32 data[],
+                                  uint32_t data_len,
+                                  uint32_t order,
+                                  FLAC__int32 residual[]);
+void FLAC__fixed_compute_residual_wide(const FLAC__int32 data[],
+                                       uint32_t data_len,
+                                       uint32_t order,
+                                       FLAC__int32 residual[]);
+void FLAC__fixed_compute_residual_wide_33bit(const FLAC__int64 data[],
+                                             uint32_t data_len,
+                                             uint32_t order,
+                                             FLAC__int32 residual[]);
 
 /*
  *	FLAC__fixed_restore_signal()
@@ -102,6 +150,17 @@
  *	IN  data[-order,-1]               previously-reconstructed historical samples
  *	OUT data[0,data_len-1]            original signal
  */
-void FLAC__fixed_restore_signal(const FLAC__int32 residual[], unsigned data_len, unsigned order, FLAC__int32 data[]);
+void FLAC__fixed_restore_signal(const FLAC__int32 residual[],
+                                uint32_t data_len,
+                                uint32_t order,
+                                FLAC__int32 data[]);
+void FLAC__fixed_restore_signal_wide(const FLAC__int32 residual[],
+                                     uint32_t data_len,
+                                     uint32_t order,
+                                     FLAC__int32 data[]);
+void FLAC__fixed_restore_signal_wide_33bit(const FLAC__int32 residual[],
+                                           uint32_t data_len,
+                                           uint32_t order,
+                                           FLAC__int64 data[]);
 
 #endif

diff --git a/src/libFLAC/include/private/float.h b/src/libFLAC/include/private/float.h
index ab26432..d13fe78 100644
--- a/src/libFLAC/include/private/float.h
+++ b/src/libFLAC/include/private/float.h

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2004-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -40,18 +40,15 @@
 #include "FLAC/ordinals.h"
 
 /*
- * These typedefs make it easier to ensure that integer versions of
- * the library really only contain integer operations.  All the code
- * in libFLAC should use FLAC__float and FLAC__double in place of
- * float and double, and be protected by checks of the macro
+ * All the code in libFLAC that uses float and double
+ * should be protected by checks of the macro
  * FLAC__INTEGER_ONLY_LIBRARY.
  *
- * FLAC__real is the basic floating point type used in LPC analysis.
  */
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
-typedef double FLAC__double;
-typedef float FLAC__float;
 /*
+ * FLAC__real is the basic floating point type used in LPC analysis.
+ *
  * WATCHOUT: changing FLAC__real will change the signatures of many
  * functions that have assembly language equivalents and break them.
  */
@@ -84,14 +81,16 @@
  *	be < 32 and evenly divisible by 4 (0 is OK but not very precise).
  *
  *	'precision' roughly limits the number of iterations that are done;
- *	use (unsigned)(-1) for maximum precision.
+ *	use (uint32_t)(-1) for maximum precision.
  *
  *	If 'x' is less than one -- that is, x < (1<<fracbits) -- then this
  *	function will punt and return 0.
  *
  *	The return value will also have 'fracbits' fractional bits.
  */
-FLAC__uint32 FLAC__fixedpoint_log2(FLAC__uint32 x, unsigned fracbits, unsigned precision);
+FLAC__uint32 FLAC__fixedpoint_log2(FLAC__uint32 x,
+                                   uint32_t fracbits,
+                                   uint32_t precision);
 
 #endif
 

diff --git a/src/libFLAC/include/private/format.h b/src/libFLAC/include/private/format.h
index 2fd3460..be258e0 100644
--- a/src/libFLAC/include/private/format.h
+++ b/src/libFLAC/include/private/format.h

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2000-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -35,11 +35,20 @@
 
 #include "FLAC/format.h"
 
-unsigned FLAC__format_get_max_rice_partition_order(unsigned blocksize, unsigned predictor_order);
-unsigned FLAC__format_get_max_rice_partition_order_from_blocksize(unsigned blocksize);
-unsigned FLAC__format_get_max_rice_partition_order_from_blocksize_limited_max_and_predictor_order(unsigned limit, unsigned blocksize, unsigned predictor_order);
+uint32_t FLAC__format_get_max_rice_partition_order(uint32_t blocksize,
+                                                   uint32_t predictor_order);
+uint32_t FLAC__format_get_max_rice_partition_order_from_blocksize(
+    uint32_t blocksize);
+uint32_t
+FLAC__format_get_max_rice_partition_order_from_blocksize_limited_max_and_predictor_order(
+    uint32_t limit,
+    uint32_t blocksize,
+    uint32_t predictor_order);
 void FLAC__format_entropy_coding_method_partitioned_rice_contents_init(FLAC__EntropyCodingMethod_PartitionedRiceContents *object);
 void FLAC__format_entropy_coding_method_partitioned_rice_contents_clear(FLAC__EntropyCodingMethod_PartitionedRiceContents *object);
-FLAC__bool FLAC__format_entropy_coding_method_partitioned_rice_contents_ensure_size(FLAC__EntropyCodingMethod_PartitionedRiceContents *object, unsigned max_partition_order);
+FLAC__bool
+FLAC__format_entropy_coding_method_partitioned_rice_contents_ensure_size(
+    FLAC__EntropyCodingMethod_PartitionedRiceContents* object,
+    uint32_t max_partition_order);
 
 #endif

diff --git a/src/libFLAC/include/private/lpc.h b/src/libFLAC/include/private/lpc.h
index d36b30b..cd1a0df 100644
--- a/src/libFLAC/include/private/lpc.h
+++ b/src/libFLAC/include/private/lpc.h

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2000-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -54,7 +54,26 @@
  *	OUT out[0,lag-1]
  *	IN data_len
  */
-void FLAC__lpc_window_data(const FLAC__int32 in[], const FLAC__real window[], FLAC__real out[], unsigned data_len);
+void FLAC__lpc_window_data(const FLAC__int32 in[],
+                           const FLAC__real window[],
+                           FLAC__real out[],
+                           uint32_t data_len);
+void FLAC__lpc_window_data_wide(const FLAC__int64 in[],
+                                const FLAC__real window[],
+                                FLAC__real out[],
+                                uint32_t data_len);
+void FLAC__lpc_window_data_partial(const FLAC__int32 in[],
+                                   const FLAC__real window[],
+                                   FLAC__real out[],
+                                   uint32_t data_len,
+                                   uint32_t part_size,
+                                   uint32_t data_shift);
+void FLAC__lpc_window_data_partial_wide(const FLAC__int64 in[],
+                                        const FLAC__real window[],
+                                        FLAC__real out[],
+                                        uint32_t data_len,
+                                        uint32_t part_size,
+                                        uint32_t data_shift);
 
 /*
  *	FLAC__lpc_compute_autocorrelation()
@@ -68,26 +87,102 @@
  *	IN 0 < lag <= data_len
  *	OUT autoc[0,lag-1]
  */
-void FLAC__lpc_compute_autocorrelation(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
+void FLAC__lpc_compute_autocorrelation(const FLAC__real data[],
+                                       uint32_t data_len,
+                                       uint32_t lag,
+                                       double autoc[]);
 #ifndef FLAC__NO_ASM
-#  ifdef FLAC__CPU_IA32
-#    ifdef FLAC__HAS_NASM
-void FLAC__lpc_compute_autocorrelation_asm_ia32(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
-void FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_4(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
-void FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_8(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
-void FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_12(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
-void FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_16(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
+#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN
+#ifdef FLAC__SSE2_SUPPORTED
+void FLAC__lpc_compute_autocorrelation_intrin_sse2_lag_8(
+    const FLAC__real data[],
+    uint32_t data_len,
+    uint32_t lag,
+    double autoc[]);
+void FLAC__lpc_compute_autocorrelation_intrin_sse2_lag_10(
+    const FLAC__real data[],
+    uint32_t data_len,
+    uint32_t lag,
+    double autoc[]);
+void FLAC__lpc_compute_autocorrelation_intrin_sse2_lag_14(
+    const FLAC__real data[],
+    uint32_t data_len,
+    uint32_t lag,
+    double autoc[]);
 #    endif
 #  endif
-#  if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
-#    ifdef FLAC__SSE_SUPPORTED
-void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
-void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
-void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
-void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
+#if defined FLAC__CPU_X86_64 && FLAC__HAS_X86INTRIN
+#ifdef FLAC__FMA_SUPPORTED
+void FLAC__lpc_compute_autocorrelation_intrin_fma_lag_8(const FLAC__real data[],
+                                                        uint32_t data_len,
+                                                        uint32_t lag,
+                                                        double autoc[]);
+void FLAC__lpc_compute_autocorrelation_intrin_fma_lag_12(
+    const FLAC__real data[],
+    uint32_t data_len,
+    uint32_t lag,
+    double autoc[]);
+void FLAC__lpc_compute_autocorrelation_intrin_fma_lag_16(
+    const FLAC__real data[],
+    uint32_t data_len,
+    uint32_t lag,
+    double autoc[]);
 #    endif
 #  endif
+#if defined(FLAC__CPU_PPC64) && defined(FLAC__USE_VSX)
+#ifdef FLAC__HAS_TARGET_POWER9
+void FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_8(
+    const FLAC__real data[],
+    uint32_t data_len,
+    uint32_t lag,
+    double autoc[]);
+void FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_10(
+    const FLAC__real data[],
+    uint32_t data_len,
+    uint32_t lag,
+    double autoc[]);
+void FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_14(
+    const FLAC__real data[],
+    uint32_t data_len,
+    uint32_t lag,
+    double autoc[]);
 #endif
+#ifdef FLAC__HAS_TARGET_POWER8
+void FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_8(
+    const FLAC__real data[],
+    uint32_t data_len,
+    uint32_t lag,
+    double autoc[]);
+void FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_10(
+    const FLAC__real data[],
+    uint32_t data_len,
+    uint32_t lag,
+    double autoc[]);
+void FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_14(
+    const FLAC__real data[],
+    uint32_t data_len,
+    uint32_t lag,
+    double autoc[]);
+#endif
+#endif
+#if defined FLAC__CPU_ARM64 && FLAC__HAS_NEONINTRIN && FLAC__HAS_A64NEONINTRIN
+void FLAC__lpc_compute_autocorrelation_intrin_neon_lag_8(
+    const FLAC__real data[],
+    uint32_t data_len,
+    uint32_t lag,
+    double autoc[]);
+void FLAC__lpc_compute_autocorrelation_intrin_neon_lag_10(
+    const FLAC__real data[],
+    uint32_t data_len,
+    uint32_t lag,
+    double autoc[]);
+void FLAC__lpc_compute_autocorrelation_intrin_neon_lag_14(
+    const FLAC__real data[],
+    uint32_t data_len,
+    uint32_t lag,
+    double autoc[]);
+#endif
+#endif /* FLAC__NO_ASM */
 
 /*
  *	FLAC__lpc_compute_lp_coefficients()
@@ -110,7 +205,11 @@
  *	         in lp_coeff[8][0,8], the LP coefficients for order 8 will be
  *			 in lp_coeff[7][0,7], etc.
  */
-void FLAC__lpc_compute_lp_coefficients(const FLAC__real autoc[], unsigned *max_order, FLAC__real lp_coeff[][FLAC__MAX_LPC_ORDER], FLAC__double error[]);
+void FLAC__lpc_compute_lp_coefficients(
+    const double autoc[],
+    uint32_t* max_order,
+    FLAC__real lp_coeff[][FLAC__MAX_LPC_ORDER],
+    double error[]);
 
 /*
  *	FLAC__lpc_quantize_coefficients()
@@ -132,7 +231,11 @@
  *         2 => coefficients are all zero, which is bad.  'shift' is
  *              unset.
  */
-int FLAC__lpc_quantize_coefficients(const FLAC__real lp_coeff[], unsigned order, unsigned precision, FLAC__int32 qlp_coeff[], int *shift);
+int FLAC__lpc_quantize_coefficients(const FLAC__real lp_coeff[],
+                                    uint32_t order,
+                                    uint32_t precision,
+                                    FLAC__int32 qlp_coeff[],
+                                    int* shift);
 
 /*
  *	FLAC__lpc_compute_residual_from_qlp_coefficients()
@@ -147,35 +250,123 @@
  *	IN lp_quantization         quantization of LP coefficients in bits
  *	OUT residual[0,data_len-1] residual signal
  */
-void FLAC__lpc_compute_residual_from_qlp_coefficients(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
-void FLAC__lpc_compute_residual_from_qlp_coefficients_wide(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
+void FLAC__lpc_compute_residual_from_qlp_coefficients(
+    const FLAC__int32* data,
+    uint32_t data_len,
+    const FLAC__int32 qlp_coeff[],
+    uint32_t order,
+    int lp_quantization,
+    FLAC__int32 residual[]);
+void FLAC__lpc_compute_residual_from_qlp_coefficients_wide(
+    const FLAC__int32* data,
+    uint32_t data_len,
+    const FLAC__int32 qlp_coeff[],
+    uint32_t order,
+    int lp_quantization,
+    FLAC__int32 residual[]);
+FLAC__bool FLAC__lpc_compute_residual_from_qlp_coefficients_limit_residual(
+    const FLAC__int32* data,
+    uint32_t data_len,
+    const FLAC__int32 qlp_coeff[],
+    uint32_t order,
+    int lp_quantization,
+    FLAC__int32 residual[]);
+FLAC__bool
+FLAC__lpc_compute_residual_from_qlp_coefficients_limit_residual_33bit(
+    const FLAC__int64* data,
+    uint32_t data_len,
+    const FLAC__int32 qlp_coeff[],
+    uint32_t order,
+    int lp_quantization,
+    FLAC__int32 residual[]);
 #ifndef FLAC__NO_ASM
-#  ifdef FLAC__CPU_IA32
-#    ifdef FLAC__HAS_NASM
-void FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
-void FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
-void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
-#    endif
-#  endif
-#  if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
+#ifdef FLAC__CPU_ARM64
+void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_neon(
+    const FLAC__int32* data,
+    uint32_t data_len,
+    const FLAC__int32 qlp_coeff[],
+    uint32_t order,
+    int lp_quantization,
+    FLAC__int32 residual[]);
+void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_neon(
+    const FLAC__int32* data,
+    uint32_t data_len,
+    const FLAC__int32 qlp_coeff[],
+    uint32_t order,
+    int lp_quantization,
+    FLAC__int32 residual[]);
+#endif
+
+#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN
 #    ifdef FLAC__SSE2_SUPPORTED
-void FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
-void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
+void FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2(
+    const FLAC__int32* data,
+    uint32_t data_len,
+    const FLAC__int32 qlp_coeff[],
+    uint32_t order,
+    int lp_quantization,
+    FLAC__int32 residual[]);
+void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2(
+    const FLAC__int32* data,
+    uint32_t data_len,
+    const FLAC__int32 qlp_coeff[],
+    uint32_t order,
+    int lp_quantization,
+    FLAC__int32 residual[]);
 #    endif
 #    ifdef FLAC__SSE4_1_SUPPORTED
-void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse41(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
-void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
+void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse41(
+    const FLAC__int32* data,
+    uint32_t data_len,
+    const FLAC__int32 qlp_coeff[],
+    uint32_t order,
+    int lp_quantization,
+    FLAC__int32 residual[]);
+void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41(
+    const FLAC__int32* data,
+    uint32_t data_len,
+    const FLAC__int32 qlp_coeff[],
+    uint32_t order,
+    int lp_quantization,
+    FLAC__int32 residual[]);
 #    endif
 #    ifdef FLAC__AVX2_SUPPORTED
-void FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
-void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
-void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
+void FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2(
+    const FLAC__int32* data,
+    uint32_t data_len,
+    const FLAC__int32 qlp_coeff[],
+    uint32_t order,
+    int lp_quantization,
+    FLAC__int32 residual[]);
+void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2(
+    const FLAC__int32* data,
+    uint32_t data_len,
+    const FLAC__int32 qlp_coeff[],
+    uint32_t order,
+    int lp_quantization,
+    FLAC__int32 residual[]);
+void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2(
+    const FLAC__int32* data,
+    uint32_t data_len,
+    const FLAC__int32 qlp_coeff[],
+    uint32_t order,
+    int lp_quantization,
+    FLAC__int32 residual[]);
 #    endif
 #  endif
 #endif
 
 #endif /* !defined FLAC__INTEGER_ONLY_LIBRARY */
 
+uint32_t FLAC__lpc_max_prediction_before_shift_bps(
+    uint32_t subframe_bps,
+    const FLAC__int32 qlp_coeff[],
+    uint32_t order);
+uint32_t FLAC__lpc_max_residual_bps(uint32_t subframe_bps,
+                                    const FLAC__int32 qlp_coeff[],
+                                    uint32_t order,
+                                    int lp_quantization);
+
 /*
  *	FLAC__lpc_restore_signal()
  *	--------------------------------------------------------------------
@@ -191,25 +382,24 @@
  *	IN  data[-order,-1]        previously-reconstructed historical samples
  *	OUT data[0,data_len-1]     original signal
  */
-void FLAC__lpc_restore_signal(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
-void FLAC__lpc_restore_signal_wide(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
-#ifndef FLAC__NO_ASM
-#  ifdef FLAC__CPU_IA32
-#    ifdef FLAC__HAS_NASM
-void FLAC__lpc_restore_signal_asm_ia32(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
-void FLAC__lpc_restore_signal_asm_ia32_mmx(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
-void FLAC__lpc_restore_signal_wide_asm_ia32(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
-#    endif /* FLAC__HAS_NASM */
-#  endif /* FLAC__CPU_IA32 */
-#  if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
-#    ifdef FLAC__SSE2_SUPPORTED
-void FLAC__lpc_restore_signal_16_intrin_sse2(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
-#    endif
-#    ifdef FLAC__SSE4_1_SUPPORTED
-void FLAC__lpc_restore_signal_wide_intrin_sse41(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
-#    endif
-#  endif
-#endif /* FLAC__NO_ASM */
+void FLAC__lpc_restore_signal(const FLAC__int32 residual[],
+                              uint32_t data_len,
+                              const FLAC__int32 qlp_coeff[],
+                              uint32_t order,
+                              int lp_quantization,
+                              FLAC__int32 data[]);
+void FLAC__lpc_restore_signal_wide(const FLAC__int32 residual[],
+                                   uint32_t data_len,
+                                   const FLAC__int32 qlp_coeff[],
+                                   uint32_t order,
+                                   int lp_quantization,
+                                   FLAC__int32 data[]);
+void FLAC__lpc_restore_signal_wide_33bit(const FLAC__int32 residual[],
+                                         uint32_t data_len,
+                                         const FLAC__int32 qlp_coeff[],
+                                         uint32_t order,
+                                         int lp_quantization,
+                                         FLAC__int64 data[]);
 
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
 
@@ -223,8 +413,12 @@
  *	IN total_samples > 0  # of samples in residual signal
  *	RETURN                expected bits per sample
  */
-FLAC__double FLAC__lpc_compute_expected_bits_per_residual_sample(FLAC__double lpc_error, unsigned total_samples);
-FLAC__double FLAC__lpc_compute_expected_bits_per_residual_sample_with_error_scale(FLAC__double lpc_error, FLAC__double error_scale);
+double FLAC__lpc_compute_expected_bits_per_residual_sample(
+    double lpc_error,
+    uint32_t total_samples);
+double FLAC__lpc_compute_expected_bits_per_residual_sample_with_error_scale(
+    double lpc_error,
+    double error_scale);
 
 /*
  *	FLAC__lpc_compute_best_order()
@@ -239,7 +433,10 @@
  *	                                    (includes warmup sample size and quantized LP coefficient)
  *	RETURN [1,max_order]                best order
  */
-unsigned FLAC__lpc_compute_best_order(const FLAC__double lpc_error[], unsigned max_order, unsigned total_samples, unsigned overhead_bits_per_order);
+uint32_t FLAC__lpc_compute_best_order(const double lpc_error[],
+                                      uint32_t max_order,
+                                      uint32_t total_samples,
+                                      uint32_t overhead_bits_per_order);
 
 #endif /* !defined FLAC__INTEGER_ONLY_LIBRARY */
 

diff --git a/src/libFLAC/include/private/macros.h b/src/libFLAC/include/private/macros.h
index 0eed2d3..6a6e7de 100644
--- a/src/libFLAC/include/private/macros.h
+++ b/src/libFLAC/include/private/macros.h

@@ -1,5 +1,5 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2012-2014  Xiph.org Foundation
+ * Copyright (C) 2012-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -32,27 +32,34 @@
 #ifndef FLAC__PRIVATE__MACROS_H
 #define FLAC__PRIVATE__MACROS_H
 
-#if defined(__GNUC__)
+#if defined(__GNUC__) && \
+    (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))
 
-#define flac_max(a,b) \
-    ({ __typeof__ (a) _a = (a); \
-    __typeof__ (b) _b = (b); \
-    _a > _b ? _a : _b; })
+#define flac_max(a, b)      \
+  ({                        \
+    __typeof__(a) _a = (a); \
+    __typeof__(b) _b = (b); \
+    _a > _b ? _a : _b;      \
+  })
 
 #define MIN_PASTE(A,B) A##B
-#define MIN_IMPL(A,B,L) ({ \
-    __typeof__(A) MIN_PASTE(__a,L) = (A); \
-    __typeof__(B) MIN_PASTE(__b,L) = (B); \
-    MIN_PASTE(__a,L) < MIN_PASTE(__b,L) ? MIN_PASTE(__a,L) : MIN_PASTE(__b,L); \
-    })
+#define MIN_IMPL(A, B, L)                                      \
+  ({                                                           \
+    __typeof__(A) MIN_PASTE(__a, L) = (A);                     \
+    __typeof__(B) MIN_PASTE(__b, L) = (B);                     \
+    MIN_PASTE(__a, L) < MIN_PASTE(__b, L) ? MIN_PASTE(__a, L)  \
+                                          : MIN_PASTE(__b, L); \
+  })
 
 #define flac_min(A,B) MIN_IMPL(A,B,__COUNTER__)
 
 /* Whatever other unix that has sys/param.h */
 #elif defined(HAVE_SYS_PARAM_H)
 #include <sys/param.h>
+#if defined(MIN) && defined(MAX)
 #define flac_max(a,b) MAX(a,b)
 #define flac_min(a,b) MIN(a,b)
+#endif
 
 /* Windows VS has them in stdlib.h.. XXX:Untested */
 #elif defined(_MSC_VER)
@@ -61,12 +68,12 @@
 #define flac_min(a,b) __min(a,b)
 #endif
 
-#ifndef MIN
-#define MIN(x,y)	((x) <= (y) ? (x) : (y))
+#ifndef flac_min
+#define flac_min(x, y) ((x) <= (y) ? (x) : (y))
 #endif
 
-#ifndef MAX
-#define MAX(x,y)	((x) >= (y) ? (x) : (y))
+#ifndef flac_max
+#define flac_max(x, y) ((x) >= (y) ? (x) : (y))
 #endif
 
 #endif

diff --git a/src/libFLAC/include/private/md5.h b/src/libFLAC/include/private/md5.h
index c665ab3..b8c13fd 100644
--- a/src/libFLAC/include/private/md5.h
+++ b/src/libFLAC/include/private/md5.h

@@ -45,6 +45,10 @@
 void FLAC__MD5Init(FLAC__MD5Context *context);
 void FLAC__MD5Final(FLAC__byte digest[16], FLAC__MD5Context *context);
 
-FLAC__bool FLAC__MD5Accumulate(FLAC__MD5Context *ctx, const FLAC__int32 * const signal[], unsigned channels, unsigned samples, unsigned bytes_per_sample);
+FLAC__bool FLAC__MD5Accumulate(FLAC__MD5Context* ctx,
+                               const FLAC__int32* const signal[],
+                               uint32_t channels,
+                               uint32_t samples,
+                               uint32_t bytes_per_sample);
 
 #endif

diff --git a/src/libFLAC/include/private/memory.h b/src/libFLAC/include/private/memory.h
index c9f2712..4babf7d 100644
--- a/src/libFLAC/include/private/memory.h
+++ b/src/libFLAC/include/private/memory.h

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2001-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -48,8 +48,15 @@
 void *FLAC__memory_alloc_aligned(size_t bytes, void **aligned_address);
 FLAC__bool FLAC__memory_alloc_aligned_int32_array(size_t elements, FLAC__int32 **unaligned_pointer, FLAC__int32 **aligned_pointer);
 FLAC__bool FLAC__memory_alloc_aligned_uint32_array(size_t elements, FLAC__uint32 **unaligned_pointer, FLAC__uint32 **aligned_pointer);
+FLAC__bool FLAC__memory_alloc_aligned_int64_array(
+    size_t elements,
+    FLAC__int64** unaligned_pointer,
+    FLAC__int64** aligned_pointer);
 FLAC__bool FLAC__memory_alloc_aligned_uint64_array(size_t elements, FLAC__uint64 **unaligned_pointer, FLAC__uint64 **aligned_pointer);
-FLAC__bool FLAC__memory_alloc_aligned_unsigned_array(size_t elements, unsigned **unaligned_pointer, unsigned **aligned_pointer);
+FLAC__bool FLAC__memory_alloc_aligned_unsigned_array(
+    size_t elements,
+    uint32_t** unaligned_pointer,
+    uint32_t** aligned_pointer);
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
 FLAC__bool FLAC__memory_alloc_aligned_real_array(size_t elements, FLAC__real **unaligned_pointer, FLAC__real **aligned_pointer);
 #endif

diff --git a/src/libFLAC/include/private/metadata.h b/src/libFLAC/include/private/metadata.h
index 092764c..409b62c 100644
--- a/src/libFLAC/include/private/metadata.h
+++ b/src/libFLAC/include/private/metadata.h

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2002-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions

diff --git a/src/libFLAC/include/private/stream_encoder.h b/src/libFLAC/include/private/stream_encoder.h
index 96d3135..1477706 100644
--- a/src/libFLAC/include/private/stream_encoder.h
+++ b/src/libFLAC/include/private/stream_encoder.h

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2000-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -48,18 +48,36 @@
 #include "FLAC/format.h"
 
 #ifdef FLAC__SSE2_SUPPORTED
-extern void FLAC__precompute_partition_info_sums_intrin_sse2(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
-			unsigned residual_samples, unsigned predictor_order, unsigned min_partition_order, unsigned max_partition_order, unsigned bps);
+extern void FLAC__precompute_partition_info_sums_intrin_sse2(
+    const FLAC__int32 residual[],
+    FLAC__uint64 abs_residual_partition_sums[],
+    uint32_t residual_samples,
+    uint32_t predictor_order,
+    uint32_t min_partition_order,
+    uint32_t max_partition_order,
+    uint32_t bps);
 #endif
 
 #ifdef FLAC__SSSE3_SUPPORTED
-extern void FLAC__precompute_partition_info_sums_intrin_ssse3(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
-			unsigned residual_samples, unsigned predictor_order, unsigned min_partition_order, unsigned max_partition_order, unsigned bps);
+extern void FLAC__precompute_partition_info_sums_intrin_ssse3(
+    const FLAC__int32 residual[],
+    FLAC__uint64 abs_residual_partition_sums[],
+    uint32_t residual_samples,
+    uint32_t predictor_order,
+    uint32_t min_partition_order,
+    uint32_t max_partition_order,
+    uint32_t bps);
 #endif
 
 #ifdef FLAC__AVX2_SUPPORTED
-extern void FLAC__precompute_partition_info_sums_intrin_avx2(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
-			unsigned residual_samples, unsigned predictor_order, unsigned min_partition_order, unsigned max_partition_order, unsigned bps);
+extern void FLAC__precompute_partition_info_sums_intrin_avx2(
+    const FLAC__int32 residual[],
+    FLAC__uint64 abs_residual_partition_sums[],
+    uint32_t residual_samples,
+    uint32_t predictor_order,
+    uint32_t min_partition_order,
+    uint32_t max_partition_order,
+    uint32_t bps);
 #endif
 
 #endif

diff --git a/src/libFLAC/include/private/stream_encoder_framing.h b/src/libFLAC/include/private/stream_encoder_framing.h
index 2b7387a..5b14324 100644
--- a/src/libFLAC/include/private/stream_encoder_framing.h
+++ b/src/libFLAC/include/private/stream_encoder_framing.h

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2000-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -38,9 +38,24 @@
 
 FLAC__bool FLAC__add_metadata_block(const FLAC__StreamMetadata *metadata, FLAC__BitWriter *bw);
 FLAC__bool FLAC__frame_add_header(const FLAC__FrameHeader *header, FLAC__BitWriter *bw);
-FLAC__bool FLAC__subframe_add_constant(const FLAC__Subframe_Constant *subframe, unsigned subframe_bps, unsigned wasted_bits, FLAC__BitWriter *bw);
-FLAC__bool FLAC__subframe_add_fixed(const FLAC__Subframe_Fixed *subframe, unsigned residual_samples, unsigned subframe_bps, unsigned wasted_bits, FLAC__BitWriter *bw);
-FLAC__bool FLAC__subframe_add_lpc(const FLAC__Subframe_LPC *subframe, unsigned residual_samples, unsigned subframe_bps, unsigned wasted_bits, FLAC__BitWriter *bw);
-FLAC__bool FLAC__subframe_add_verbatim(const FLAC__Subframe_Verbatim *subframe, unsigned samples, unsigned subframe_bps, unsigned wasted_bits, FLAC__BitWriter *bw);
+FLAC__bool FLAC__subframe_add_constant(const FLAC__Subframe_Constant* subframe,
+                                       uint32_t subframe_bps,
+                                       uint32_t wasted_bits,
+                                       FLAC__BitWriter* bw);
+FLAC__bool FLAC__subframe_add_fixed(const FLAC__Subframe_Fixed* subframe,
+                                    uint32_t residual_samples,
+                                    uint32_t subframe_bps,
+                                    uint32_t wasted_bits,
+                                    FLAC__BitWriter* bw);
+FLAC__bool FLAC__subframe_add_lpc(const FLAC__Subframe_LPC* subframe,
+                                  uint32_t residual_samples,
+                                  uint32_t subframe_bps,
+                                  uint32_t wasted_bits,
+                                  FLAC__BitWriter* bw);
+FLAC__bool FLAC__subframe_add_verbatim(const FLAC__Subframe_Verbatim* subframe,
+                                       uint32_t samples,
+                                       uint32_t subframe_bps,
+                                       uint32_t wasted_bits,
+                                       FLAC__BitWriter* bw);
 
 #endif

diff --git a/src/libFLAC/include/private/window.h b/src/libFLAC/include/private/window.h
index 52c9262..4aaaaea 100644
--- a/src/libFLAC/include/private/window.h
+++ b/src/libFLAC/include/private/window.h

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2006-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions

diff --git a/src/libFLAC/include/protected/all.h b/src/libFLAC/include/protected/all.h
index 90912af..ad30230 100644
--- a/src/libFLAC/include/protected/all.h
+++ b/src/libFLAC/include/protected/all.h

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2001-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions

diff --git a/src/libFLAC/include/protected/stream_decoder.h b/src/libFLAC/include/protected/stream_decoder.h
index 7be95af..02e4d39 100644
--- a/src/libFLAC/include/protected/stream_decoder.h
+++ b/src/libFLAC/include/protected/stream_decoder.h

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2000-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -41,20 +41,23 @@
 typedef struct FLAC__StreamDecoderProtected {
 	FLAC__StreamDecoderState state;
 	FLAC__StreamDecoderInitStatus initstate;
-	unsigned channels;
-	FLAC__ChannelAssignment channel_assignment;
-	unsigned bits_per_sample;
-	unsigned sample_rate; /* in Hz */
-	unsigned blocksize; /* in samples (per channel) */
-	FLAC__bool md5_checking; /* if true, generate MD5 signature of decoded data and compare against signature in the STREAMINFO metadata block */
+        uint32_t channels;
+        FLAC__ChannelAssignment channel_assignment;
+        uint32_t bits_per_sample;
+        uint32_t sample_rate;    /* in Hz */
+        uint32_t blocksize;      /* in samples (per channel) */
+        FLAC__bool md5_checking; /* if true, generate MD5 signature of decoded
+                                    data and compare against signature in the
+                                    STREAMINFO metadata block */
 #if FLAC__HAS_OGG
 	FLAC__OggDecoderAspect ogg_decoder_aspect;
 #endif
 } FLAC__StreamDecoderProtected;
 
 /*
- * return the number of input bytes consumed
+ * Return the number of input bytes consumed
  */
-unsigned FLAC__stream_decoder_get_input_bytes_unconsumed(const FLAC__StreamDecoder *decoder);
+uint32_t FLAC__stream_decoder_get_input_bytes_unconsumed(
+    const FLAC__StreamDecoder* decoder);
 
 #endif

diff --git a/src/libFLAC/include/protected/stream_encoder.h b/src/libFLAC/include/protected/stream_encoder.h
index 6917f5d..a94d830 100644
--- a/src/libFLAC/include/protected/stream_encoder.h
+++ b/src/libFLAC/include/protected/stream_encoder.h

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2001-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -45,23 +45,24 @@
 #define FLAC__MAX_APODIZATION_FUNCTIONS 32
 
 typedef enum {
-	FLAC__APODIZATION_BARTLETT,
-	FLAC__APODIZATION_BARTLETT_HANN,
-	FLAC__APODIZATION_BLACKMAN,
-	FLAC__APODIZATION_BLACKMAN_HARRIS_4TERM_92DB_SIDELOBE,
-	FLAC__APODIZATION_CONNES,
-	FLAC__APODIZATION_FLATTOP,
-	FLAC__APODIZATION_GAUSS,
-	FLAC__APODIZATION_HAMMING,
-	FLAC__APODIZATION_HANN,
-	FLAC__APODIZATION_KAISER_BESSEL,
-	FLAC__APODIZATION_NUTTALL,
-	FLAC__APODIZATION_RECTANGLE,
-	FLAC__APODIZATION_TRIANGLE,
-	FLAC__APODIZATION_TUKEY,
-	FLAC__APODIZATION_PARTIAL_TUKEY,
-	FLAC__APODIZATION_PUNCHOUT_TUKEY,
-	FLAC__APODIZATION_WELCH
+  FLAC__APODIZATION_BARTLETT,
+  FLAC__APODIZATION_BARTLETT_HANN,
+  FLAC__APODIZATION_BLACKMAN,
+  FLAC__APODIZATION_BLACKMAN_HARRIS_4TERM_92DB_SIDELOBE,
+  FLAC__APODIZATION_CONNES,
+  FLAC__APODIZATION_FLATTOP,
+  FLAC__APODIZATION_GAUSS,
+  FLAC__APODIZATION_HAMMING,
+  FLAC__APODIZATION_HANN,
+  FLAC__APODIZATION_KAISER_BESSEL,
+  FLAC__APODIZATION_NUTTALL,
+  FLAC__APODIZATION_RECTANGLE,
+  FLAC__APODIZATION_TRIANGLE,
+  FLAC__APODIZATION_TUKEY,
+  FLAC__APODIZATION_PARTIAL_TUKEY,
+  FLAC__APODIZATION_PUNCHOUT_TUKEY,
+  FLAC__APODIZATION_SUBDIVIDE_TUKEY,
+  FLAC__APODIZATION_WELCH
 } FLAC__ApodizationFunction;
 
 typedef struct {
@@ -78,7 +79,11 @@
 			FLAC__real start;
 			FLAC__real end;
 		} multiple_tukey;
-	} parameters;
+                struct {
+                        FLAC__real p;
+                        FLAC__int32 parts;
+                } subdivide_tukey;
+        } parameters;
 } FLAC__ApodizationSpecification;
 
 #endif // #ifndef FLAC__INTEGER_ONLY_LIBRARY
@@ -90,26 +95,28 @@
 	FLAC__bool do_md5;
 	FLAC__bool do_mid_side_stereo;
 	FLAC__bool loose_mid_side_stereo;
-	unsigned channels;
-	unsigned bits_per_sample;
-	unsigned sample_rate;
-	unsigned blocksize;
+        uint32_t channels;
+        uint32_t bits_per_sample;
+        uint32_t sample_rate;
+        uint32_t blocksize;
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
-	unsigned num_apodizations;
-	FLAC__ApodizationSpecification apodizations[FLAC__MAX_APODIZATION_FUNCTIONS];
+        uint32_t num_apodizations;
+        FLAC__ApodizationSpecification
+            apodizations[FLAC__MAX_APODIZATION_FUNCTIONS];
 #endif
-	unsigned max_lpc_order;
-	unsigned qlp_coeff_precision;
-	FLAC__bool do_qlp_coeff_prec_search;
-	FLAC__bool do_exhaustive_model_search;
+        uint32_t max_lpc_order;
+        uint32_t qlp_coeff_precision;
+        FLAC__bool do_qlp_coeff_prec_search;
+        FLAC__bool do_exhaustive_model_search;
 	FLAC__bool do_escape_coding;
-	unsigned min_residual_partition_order;
-	unsigned max_residual_partition_order;
-	unsigned rice_parameter_search_dist;
-	FLAC__uint64 total_samples_estimate;
-	FLAC__StreamMetadata **metadata;
-	unsigned num_metadata_blocks;
-	FLAC__uint64 streaminfo_offset, seektable_offset, audio_offset;
+        uint32_t min_residual_partition_order;
+        uint32_t max_residual_partition_order;
+        uint32_t rice_parameter_search_dist;
+        FLAC__uint64 total_samples_estimate;
+        FLAC__bool limit_min_bitrate;
+        FLAC__StreamMetadata** metadata;
+        uint32_t num_metadata_blocks;
+        FLAC__uint64 streaminfo_offset, seektable_offset, audio_offset;
 #if FLAC__HAS_OGG
 	FLAC__OggEncoderAspect ogg_encoder_aspect;
 #endif

diff --git a/src/libFLAC/lpc.c b/src/libFLAC/lpc.c
index 843ee00..9d9c7e3 100644
--- a/src/libFLAC/lpc.c
+++ b/src/libFLAC/lpc.c

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2000-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -35,6 +35,7 @@
 #endif
 
 #include <math.h>
+#include <stdlib.h>
 
 #include "FLAC/assert.h"
 #include "FLAC/format.h"
@@ -42,7 +43,8 @@
 #include "private/bitmath.h"
 #include "private/lpc.h"
 #include "private/macros.h"
-#if defined DEBUG || defined FLAC__OVERFLOW_DETECT || defined FLAC__OVERFLOW_DETECT_VERBOSE
+
+#if !defined(NDEBUG) || defined FLAC__OVERFLOW_DETECT || defined FLAC__OVERFLOW_DETECT_VERBOSE
 #include <stdio.h>
 #endif
 
@@ -51,32 +53,66 @@
 
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
 
-#if !defined(HAVE_LROUND)
-#if defined(_MSC_VER)
+#if defined(_MSC_VER) && (_MSC_VER < 1800)
 #include <float.h>
-#define copysign _copysign
-#elif defined(__GNUC__)
-#define copysign __builtin_copysign
-#endif
 static inline long int lround(double x) {
-    return (long)(x + copysign (0.5, x));
+	return (long)(x + _copysign(0.5, x));
+}
+#elif !defined(HAVE_LROUND) && defined(__GNUC__)
+static inline long int lround(double x) {
+	return (long)(x + __builtin_copysign(0.5, x));
 }
 /* If this fails, we are in the presence of a mid 90's compiler, move along... */
 #endif
 
-void FLAC__lpc_window_data(const FLAC__int32 in[], const FLAC__real window[], FLAC__real out[], unsigned data_len)
+void FLAC__lpc_window_data(const FLAC__int32 in[], const FLAC__real window[], FLAC__real out[], uint32_t data_len)
 {
-	unsigned i;
+	uint32_t i;
 	for(i = 0; i < data_len; i++)
 		out[i] = in[i] * window[i];
 }
 
-void FLAC__lpc_compute_autocorrelation(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
+void FLAC__lpc_window_data_wide(const FLAC__int64 in[], const FLAC__real window[], FLAC__real out[], uint32_t data_len)
+{
+	uint32_t i;
+	for(i = 0; i < data_len; i++)
+		out[i] = in[i] * window[i];
+}
+
+void FLAC__lpc_window_data_partial(const FLAC__int32 in[], const FLAC__real window[], FLAC__real out[], uint32_t data_len, uint32_t part_size, uint32_t data_shift)
+{
+	uint32_t i, j;
+	if((part_size + data_shift) < data_len){
+		for(i = 0; i < part_size; i++)
+			out[i] = in[data_shift+i] * window[i];
+		i = flac_min(i,data_len - part_size - data_shift);
+		for(j = data_len - part_size; j < data_len; i++, j++)
+			out[i] = in[data_shift+i] * window[j];
+		if(i < data_len)
+			out[i] = 0.0f;
+	}
+}
+
+void FLAC__lpc_window_data_partial_wide(const FLAC__int64 in[], const FLAC__real window[], FLAC__real out[], uint32_t data_len, uint32_t part_size, uint32_t data_shift)
+{
+	uint32_t i, j;
+	if((part_size + data_shift) < data_len){
+		for(i = 0; i < part_size; i++)
+			out[i] = in[data_shift+i] * window[i];
+		i = flac_min(i,data_len - part_size - data_shift);
+		for(j = data_len - part_size; j < data_len; i++, j++)
+			out[i] = in[data_shift+i] * window[j];
+		if(i < data_len)
+			out[i] = 0.0f;
+	}
+}
+
+void FLAC__lpc_compute_autocorrelation(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[])
 {
 	/* a readable, but slower, version */
 #if 0
-	FLAC__real d;
-	unsigned i;
+	double d;
+	uint32_t i;
 
 	FLAC__ASSERT(lag > 0);
 	FLAC__ASSERT(lag <= data_len);
@@ -90,40 +126,57 @@
 	 */
 	while(lag--) {
 		for(i = lag, d = 0.0; i < data_len; i++)
-			d += data[i] * data[i - lag];
+			d += data[i] * (double)data[i - lag];
 		autoc[lag] = d;
 	}
 #endif
+	if (data_len < FLAC__MAX_LPC_ORDER || lag > 16) {
+		/*
+		 * this version tends to run faster because of better data locality
+		 * ('data_len' is usually much larger than 'lag')
+		 */
+		double d;
+		uint32_t sample, coeff;
+		const uint32_t limit = data_len - lag;
 
-	/*
-	 * this version tends to run faster because of better data locality
-	 * ('data_len' is usually much larger than 'lag')
-	 */
-	FLAC__real d;
-	unsigned sample, coeff;
-	const unsigned limit = data_len - lag;
+		FLAC__ASSERT(lag > 0);
+		FLAC__ASSERT(lag <= data_len);
 
-	FLAC__ASSERT(lag > 0);
-	FLAC__ASSERT(lag <= data_len);
-
-	for(coeff = 0; coeff < lag; coeff++)
-		autoc[coeff] = 0.0;
-	for(sample = 0; sample <= limit; sample++) {
-		d = data[sample];
 		for(coeff = 0; coeff < lag; coeff++)
-			autoc[coeff] += d * data[sample+coeff];
+			autoc[coeff] = 0.0;
+		for(sample = 0; sample <= limit; sample++) {
+			d = data[sample];
+			for(coeff = 0; coeff < lag; coeff++)
+				autoc[coeff] += d * data[sample+coeff];
+		}
+		for(; sample < data_len; sample++) {
+			d = data[sample];
+			for(coeff = 0; coeff < data_len - sample; coeff++)
+				autoc[coeff] += d * data[sample+coeff];
+		}
 	}
-	for(; sample < data_len; sample++) {
-		d = data[sample];
-		for(coeff = 0; coeff < data_len - sample; coeff++)
-			autoc[coeff] += d * data[sample+coeff];
+	else if(lag <= 8) {
+		#undef MAX_LAG
+		#define MAX_LAG 8
+		#include "deduplication/lpc_compute_autocorrelation_intrin.c"
 	}
+	else if(lag <= 12) {
+		#undef MAX_LAG
+		#define MAX_LAG 12
+		#include "deduplication/lpc_compute_autocorrelation_intrin.c"
+	}
+	else if(lag <= 16) {
+		#undef MAX_LAG
+		#define MAX_LAG 16
+		#include "deduplication/lpc_compute_autocorrelation_intrin.c"
+	}
+
 }
 
-void FLAC__lpc_compute_lp_coefficients(const FLAC__real autoc[], unsigned *max_order, FLAC__real lp_coeff[][FLAC__MAX_LPC_ORDER], FLAC__double error[])
+void FLAC__lpc_compute_lp_coefficients(const double autoc[], uint32_t *max_order, FLAC__real lp_coeff[][FLAC__MAX_LPC_ORDER], double error[])
 {
-	unsigned i, j;
-	FLAC__double r, err, lpc[FLAC__MAX_LPC_ORDER];
+	uint32_t i, j;
+	double r, err, lpc[FLAC__MAX_LPC_ORDER];
 
 	FLAC__ASSERT(0 != max_order);
 	FLAC__ASSERT(0 < *max_order);
@@ -142,7 +195,7 @@
 		/* Update LPC coefficients and total error. */
 		lpc[i]=r;
 		for(j = 0; j < (i>>1); j++) {
-			FLAC__double tmp = lpc[j];
+			double tmp = lpc[j];
 			lpc[j] += r * lpc[i-1-j];
 			lpc[i-1-j] += r * tmp;
 		}
@@ -164,10 +217,10 @@
 	}
 }
 
-int FLAC__lpc_quantize_coefficients(const FLAC__real lp_coeff[], unsigned order, unsigned precision, FLAC__int32 qlp_coeff[], int *shift)
+int FLAC__lpc_quantize_coefficients(const FLAC__real lp_coeff[], uint32_t order, uint32_t precision, FLAC__int32 qlp_coeff[], int *shift)
 {
-	unsigned i;
-	FLAC__double cmax;
+	uint32_t i;
+	double cmax;
 	FLAC__int32 qmax, qmin;
 
 	FLAC__ASSERT(precision > 0);
@@ -182,7 +235,7 @@
 	/* calc cmax = max( |lp_coeff[i]| ) */
 	cmax = 0.0;
 	for(i = 0; i < order; i++) {
-		const FLAC__double d = fabs(lp_coeff[i]);
+		const double d = fabs(lp_coeff[i]);
 		if(d > cmax)
 			cmax = d;
 	}
@@ -207,7 +260,7 @@
 	}
 
 	if(*shift >= 0) {
-		FLAC__double error = 0.0;
+		double error = 0.0;
 		FLAC__int32 q;
 		for(i = 0; i < order; i++) {
 			error += lp_coeff[i] * (1 << *shift);
@@ -228,14 +281,14 @@
 		}
 	}
 	/* negative shift is very rare but due to design flaw, negative shift is
-	 * a NOP in the decoder, so it must be handled specially by scaling down
-	 * coeffs
+	 * not allowed in the decoder, so it must be handled specially by scaling
+	 * down coeffs
 	 */
 	else {
 		const int nshift = -(*shift);
-		FLAC__double error = 0.0;
+		double error = 0.0;
 		FLAC__int32 q;
-#ifdef DEBUG
+#ifndef NDEBUG
 		fprintf(stderr,"FLAC__lpc_quantize_coefficients: negative shift=%d order=%u cmax=%f\n", *shift, order, cmax);
 #endif
 		for(i = 0; i < order; i++) {
@@ -265,11 +318,11 @@
 #pragma warning ( disable : 4028 )
 #endif
 
-void FLAC__lpc_compute_residual_from_qlp_coefficients(const FLAC__int32 * flac_restrict data, unsigned data_len, const FLAC__int32 * flac_restrict qlp_coeff, unsigned order, int lp_quantization, FLAC__int32 * flac_restrict residual)
+void FLAC__lpc_compute_residual_from_qlp_coefficients(const FLAC__int32 * flac_restrict data, uint32_t data_len, const FLAC__int32 * flac_restrict qlp_coeff, uint32_t order, int lp_quantization, FLAC__int32 * flac_restrict residual)
 #if defined(FLAC__OVERFLOW_DETECT) || !defined(FLAC__LPC_UNROLLED_FILTER_LOOPS)
 {
 	FLAC__int64 sumo;
-	unsigned i, j;
+	uint32_t i, j;
 	FLAC__int32 sum;
 	const FLAC__int32 *history;
 
@@ -487,25 +540,25 @@
 		for(i = 0; i < (int)data_len; i++) {
 			sum = 0;
 			switch(order) {
-				case 32: sum += qlp_coeff[31] * data[i-32];
-				case 31: sum += qlp_coeff[30] * data[i-31];
-				case 30: sum += qlp_coeff[29] * data[i-30];
-				case 29: sum += qlp_coeff[28] * data[i-29];
-				case 28: sum += qlp_coeff[27] * data[i-28];
-				case 27: sum += qlp_coeff[26] * data[i-27];
-				case 26: sum += qlp_coeff[25] * data[i-26];
-				case 25: sum += qlp_coeff[24] * data[i-25];
-				case 24: sum += qlp_coeff[23] * data[i-24];
-				case 23: sum += qlp_coeff[22] * data[i-23];
-				case 22: sum += qlp_coeff[21] * data[i-22];
-				case 21: sum += qlp_coeff[20] * data[i-21];
-				case 20: sum += qlp_coeff[19] * data[i-20];
-				case 19: sum += qlp_coeff[18] * data[i-19];
-				case 18: sum += qlp_coeff[17] * data[i-18];
-				case 17: sum += qlp_coeff[16] * data[i-17];
-				case 16: sum += qlp_coeff[15] * data[i-16];
-				case 15: sum += qlp_coeff[14] * data[i-15];
-				case 14: sum += qlp_coeff[13] * data[i-14];
+				case 32: sum += qlp_coeff[31] * data[i-32]; /* Falls through. */
+				case 31: sum += qlp_coeff[30] * data[i-31]; /* Falls through. */
+				case 30: sum += qlp_coeff[29] * data[i-30]; /* Falls through. */
+				case 29: sum += qlp_coeff[28] * data[i-29]; /* Falls through. */
+				case 28: sum += qlp_coeff[27] * data[i-28]; /* Falls through. */
+				case 27: sum += qlp_coeff[26] * data[i-27]; /* Falls through. */
+				case 26: sum += qlp_coeff[25] * data[i-26]; /* Falls through. */
+				case 25: sum += qlp_coeff[24] * data[i-25]; /* Falls through. */
+				case 24: sum += qlp_coeff[23] * data[i-24]; /* Falls through. */
+				case 23: sum += qlp_coeff[22] * data[i-23]; /* Falls through. */
+				case 22: sum += qlp_coeff[21] * data[i-22]; /* Falls through. */
+				case 21: sum += qlp_coeff[20] * data[i-21]; /* Falls through. */
+				case 20: sum += qlp_coeff[19] * data[i-20]; /* Falls through. */
+				case 19: sum += qlp_coeff[18] * data[i-19]; /* Falls through. */
+				case 18: sum += qlp_coeff[17] * data[i-18]; /* Falls through. */
+				case 17: sum += qlp_coeff[16] * data[i-17]; /* Falls through. */
+				case 16: sum += qlp_coeff[15] * data[i-16]; /* Falls through. */
+				case 15: sum += qlp_coeff[14] * data[i-15]; /* Falls through. */
+				case 14: sum += qlp_coeff[13] * data[i-14]; /* Falls through. */
 				case 13: sum += qlp_coeff[12] * data[i-13];
 				         sum += qlp_coeff[11] * data[i-12];
 				         sum += qlp_coeff[10] * data[i-11];
@@ -526,10 +579,10 @@
 }
 #endif
 
-void FLAC__lpc_compute_residual_from_qlp_coefficients_wide(const FLAC__int32 * flac_restrict data, unsigned data_len, const FLAC__int32 * flac_restrict qlp_coeff, unsigned order, int lp_quantization, FLAC__int32 * flac_restrict residual)
+void FLAC__lpc_compute_residual_from_qlp_coefficients_wide(const FLAC__int32 * flac_restrict data, uint32_t data_len, const FLAC__int32 * flac_restrict qlp_coeff, uint32_t order, int lp_quantization, FLAC__int32 * flac_restrict residual)
 #if defined(FLAC__OVERFLOW_DETECT) || !defined(FLAC__LPC_UNROLLED_FILTER_LOOPS)
 {
-	unsigned i, j;
+	uint32_t i, j;
 	FLAC__int64 sum;
 	const FLAC__int32 *history;
 
@@ -546,11 +599,7 @@
 		history = data;
 		for(j = 0; j < order; j++)
 			sum += (FLAC__int64)qlp_coeff[j] * (FLAC__int64)(*(--history));
-		if(FLAC__bitmath_silog2_wide(sum >> lp_quantization) > 32) {
-			fprintf(stderr,"FLAC__lpc_compute_residual_from_qlp_coefficients_wide: OVERFLOW, i=%u, sum=%" PRId64 "\n", i, (sum >> lp_quantization));
-			break;
-		}
-		if(FLAC__bitmath_silog2_wide((FLAC__int64)(*data) - (sum >> lp_quantization)) > 32) {
+		if(FLAC__bitmath_silog2((FLAC__int64)(*data) - (sum >> lp_quantization)) > 32) {
 			fprintf(stderr,"FLAC__lpc_compute_residual_from_qlp_coefficients_wide: OVERFLOW, i=%u, data=%d, sum=%" PRId64 ", residual=%" PRId64 "\n", i, *data, (int64_t)(sum >> lp_quantization), ((FLAC__int64)(*data) - (sum >> lp_quantization)));
 			break;
 		}
@@ -588,7 +637,7 @@
 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
-						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
+						residual[i] = data[i] - (sum >> lp_quantization);
 					}
 				}
 				else { /* order == 11 */
@@ -605,7 +654,7 @@
 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
-						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
+						residual[i] = data[i] - (sum >> lp_quantization);
 					}
 				}
 			}
@@ -623,7 +672,7 @@
 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
-						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
+						residual[i] = data[i] - (sum >> lp_quantization);
 					}
 				}
 				else { /* order == 9 */
@@ -638,7 +687,7 @@
 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
-						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
+						residual[i] = data[i] - (sum >> lp_quantization);
 					}
 				}
 			}
@@ -656,7 +705,7 @@
 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
-						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
+						residual[i] = data[i] - (sum >> lp_quantization);
 					}
 				}
 				else { /* order == 7 */
@@ -669,7 +718,7 @@
 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
-						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
+						residual[i] = data[i] - (sum >> lp_quantization);
 					}
 				}
 			}
@@ -683,7 +732,7 @@
 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
-						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
+						residual[i] = data[i] - (sum >> lp_quantization);
 					}
 				}
 				else { /* order == 5 */
@@ -694,7 +743,7 @@
 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
-						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
+						residual[i] = data[i] - (sum >> lp_quantization);
 					}
 				}
 			}
@@ -708,7 +757,7 @@
 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
-						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
+						residual[i] = data[i] - (sum >> lp_quantization);
 					}
 				}
 				else { /* order == 3 */
@@ -717,7 +766,7 @@
 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
-						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
+						residual[i] = data[i] - (sum >> lp_quantization);
 					}
 				}
 			}
@@ -727,12 +776,12 @@
 						sum = 0;
 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
-						residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
+						residual[i] = data[i] - (sum >> lp_quantization);
 					}
 				}
 				else { /* order == 1 */
 					for(i = 0; i < (int)data_len; i++)
-						residual[i] = data[i] - (FLAC__int32)((qlp_coeff[0] * (FLAC__int64)data[i-1]) >> lp_quantization);
+						residual[i] = data[i] - ((qlp_coeff[0] * (FLAC__int64)data[i-1]) >> lp_quantization);
 				}
 			}
 		}
@@ -741,25 +790,25 @@
 		for(i = 0; i < (int)data_len; i++) {
 			sum = 0;
 			switch(order) {
-				case 32: sum += qlp_coeff[31] * (FLAC__int64)data[i-32];
-				case 31: sum += qlp_coeff[30] * (FLAC__int64)data[i-31];
-				case 30: sum += qlp_coeff[29] * (FLAC__int64)data[i-30];
-				case 29: sum += qlp_coeff[28] * (FLAC__int64)data[i-29];
-				case 28: sum += qlp_coeff[27] * (FLAC__int64)data[i-28];
-				case 27: sum += qlp_coeff[26] * (FLAC__int64)data[i-27];
-				case 26: sum += qlp_coeff[25] * (FLAC__int64)data[i-26];
-				case 25: sum += qlp_coeff[24] * (FLAC__int64)data[i-25];
-				case 24: sum += qlp_coeff[23] * (FLAC__int64)data[i-24];
-				case 23: sum += qlp_coeff[22] * (FLAC__int64)data[i-23];
-				case 22: sum += qlp_coeff[21] * (FLAC__int64)data[i-22];
-				case 21: sum += qlp_coeff[20] * (FLAC__int64)data[i-21];
-				case 20: sum += qlp_coeff[19] * (FLAC__int64)data[i-20];
-				case 19: sum += qlp_coeff[18] * (FLAC__int64)data[i-19];
-				case 18: sum += qlp_coeff[17] * (FLAC__int64)data[i-18];
-				case 17: sum += qlp_coeff[16] * (FLAC__int64)data[i-17];
-				case 16: sum += qlp_coeff[15] * (FLAC__int64)data[i-16];
-				case 15: sum += qlp_coeff[14] * (FLAC__int64)data[i-15];
-				case 14: sum += qlp_coeff[13] * (FLAC__int64)data[i-14];
+				case 32: sum += qlp_coeff[31] * (FLAC__int64)data[i-32]; /* Falls through. */
+				case 31: sum += qlp_coeff[30] * (FLAC__int64)data[i-31]; /* Falls through. */
+				case 30: sum += qlp_coeff[29] * (FLAC__int64)data[i-30]; /* Falls through. */
+				case 29: sum += qlp_coeff[28] * (FLAC__int64)data[i-29]; /* Falls through. */
+				case 28: sum += qlp_coeff[27] * (FLAC__int64)data[i-28]; /* Falls through. */
+				case 27: sum += qlp_coeff[26] * (FLAC__int64)data[i-27]; /* Falls through. */
+				case 26: sum += qlp_coeff[25] * (FLAC__int64)data[i-26]; /* Falls through. */
+				case 25: sum += qlp_coeff[24] * (FLAC__int64)data[i-25]; /* Falls through. */
+				case 24: sum += qlp_coeff[23] * (FLAC__int64)data[i-24]; /* Falls through. */
+				case 23: sum += qlp_coeff[22] * (FLAC__int64)data[i-23]; /* Falls through. */
+				case 22: sum += qlp_coeff[21] * (FLAC__int64)data[i-22]; /* Falls through. */
+				case 21: sum += qlp_coeff[20] * (FLAC__int64)data[i-21]; /* Falls through. */
+				case 20: sum += qlp_coeff[19] * (FLAC__int64)data[i-20]; /* Falls through. */
+				case 19: sum += qlp_coeff[18] * (FLAC__int64)data[i-19]; /* Falls through. */
+				case 18: sum += qlp_coeff[17] * (FLAC__int64)data[i-18]; /* Falls through. */
+				case 17: sum += qlp_coeff[16] * (FLAC__int64)data[i-17]; /* Falls through. */
+				case 16: sum += qlp_coeff[15] * (FLAC__int64)data[i-16]; /* Falls through. */
+				case 15: sum += qlp_coeff[14] * (FLAC__int64)data[i-15]; /* Falls through. */
+				case 14: sum += qlp_coeff[13] * (FLAC__int64)data[i-14]; /* Falls through. */
 				case 13: sum += qlp_coeff[12] * (FLAC__int64)data[i-13];
 				         sum += qlp_coeff[11] * (FLAC__int64)data[i-12];
 				         sum += qlp_coeff[10] * (FLAC__int64)data[i-11];
@@ -774,19 +823,160 @@
 				         sum += qlp_coeff[ 1] * (FLAC__int64)data[i- 2];
 				         sum += qlp_coeff[ 0] * (FLAC__int64)data[i- 1];
 			}
-			residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
+			residual[i] = data[i] - (sum >> lp_quantization);
 		}
 	}
 }
 #endif
 
+FLAC__bool FLAC__lpc_compute_residual_from_qlp_coefficients_limit_residual(const FLAC__int32 * flac_restrict data, uint32_t data_len, const FLAC__int32 * flac_restrict qlp_coeff, uint32_t order, int lp_quantization, FLAC__int32 * flac_restrict residual)
+{
+	int i;
+	FLAC__int64 sum, residual_to_check;
+
+	FLAC__ASSERT(order > 0);
+	FLAC__ASSERT(order <= 32);
+
+	for(i = 0; i < (int)data_len; i++) {
+		sum = 0;
+		switch(order) {
+			case 32: sum += qlp_coeff[31] * (FLAC__int64)data[i-32]; /* Falls through. */
+			case 31: sum += qlp_coeff[30] * (FLAC__int64)data[i-31]; /* Falls through. */
+			case 30: sum += qlp_coeff[29] * (FLAC__int64)data[i-30]; /* Falls through. */
+			case 29: sum += qlp_coeff[28] * (FLAC__int64)data[i-29]; /* Falls through. */
+			case 28: sum += qlp_coeff[27] * (FLAC__int64)data[i-28]; /* Falls through. */
+			case 27: sum += qlp_coeff[26] * (FLAC__int64)data[i-27]; /* Falls through. */
+			case 26: sum += qlp_coeff[25] * (FLAC__int64)data[i-26]; /* Falls through. */
+			case 25: sum += qlp_coeff[24] * (FLAC__int64)data[i-25]; /* Falls through. */
+			case 24: sum += qlp_coeff[23] * (FLAC__int64)data[i-24]; /* Falls through. */
+			case 23: sum += qlp_coeff[22] * (FLAC__int64)data[i-23]; /* Falls through. */
+			case 22: sum += qlp_coeff[21] * (FLAC__int64)data[i-22]; /* Falls through. */
+			case 21: sum += qlp_coeff[20] * (FLAC__int64)data[i-21]; /* Falls through. */
+			case 20: sum += qlp_coeff[19] * (FLAC__int64)data[i-20]; /* Falls through. */
+			case 19: sum += qlp_coeff[18] * (FLAC__int64)data[i-19]; /* Falls through. */
+			case 18: sum += qlp_coeff[17] * (FLAC__int64)data[i-18]; /* Falls through. */
+			case 17: sum += qlp_coeff[16] * (FLAC__int64)data[i-17]; /* Falls through. */
+			case 16: sum += qlp_coeff[15] * (FLAC__int64)data[i-16]; /* Falls through. */
+			case 15: sum += qlp_coeff[14] * (FLAC__int64)data[i-15]; /* Falls through. */
+			case 14: sum += qlp_coeff[13] * (FLAC__int64)data[i-14]; /* Falls through. */
+			case 13: sum += qlp_coeff[12] * (FLAC__int64)data[i-13]; /* Falls through. */
+			case 12: sum += qlp_coeff[11] * (FLAC__int64)data[i-12]; /* Falls through. */
+			case 11: sum += qlp_coeff[10] * (FLAC__int64)data[i-11]; /* Falls through. */
+			case 10: sum += qlp_coeff[ 9] * (FLAC__int64)data[i-10]; /* Falls through. */
+			case  9: sum += qlp_coeff[ 8] * (FLAC__int64)data[i- 9]; /* Falls through. */
+			case  8: sum += qlp_coeff[ 7] * (FLAC__int64)data[i- 8]; /* Falls through. */
+			case  7: sum += qlp_coeff[ 6] * (FLAC__int64)data[i- 7]; /* Falls through. */
+			case  6: sum += qlp_coeff[ 5] * (FLAC__int64)data[i- 6]; /* Falls through. */
+			case  5: sum += qlp_coeff[ 4] * (FLAC__int64)data[i- 5]; /* Falls through. */
+			case  4: sum += qlp_coeff[ 3] * (FLAC__int64)data[i- 4]; /* Falls through. */
+			case  3: sum += qlp_coeff[ 2] * (FLAC__int64)data[i- 3]; /* Falls through. */
+			case  2: sum += qlp_coeff[ 1] * (FLAC__int64)data[i- 2]; /* Falls through. */
+			case  1: sum += qlp_coeff[ 0] * (FLAC__int64)data[i- 1];
+		}
+		residual_to_check = data[i] - (sum >> lp_quantization);
+		 /* residual must not be INT32_MIN because abs(INT32_MIN) is undefined */
+		if(residual_to_check <= INT32_MIN || residual_to_check > INT32_MAX)
+			return false;
+		else
+			residual[i] = residual_to_check;
+	}
+	return true;
+}
+
+FLAC__bool FLAC__lpc_compute_residual_from_qlp_coefficients_limit_residual_33bit(const FLAC__int64 * flac_restrict data, uint32_t data_len, const FLAC__int32 * flac_restrict qlp_coeff, uint32_t order, int lp_quantization, FLAC__int32 * flac_restrict residual)
+{
+	int i;
+	FLAC__int64 sum, residual_to_check;
+
+	FLAC__ASSERT(order > 0);
+	FLAC__ASSERT(order <= 32);
+
+	for(i = 0; i < (int)data_len; i++) {
+		sum = 0;
+		switch(order) {
+			case 32: sum += qlp_coeff[31] * data[i-32]; /* Falls through. */
+			case 31: sum += qlp_coeff[30] * data[i-31]; /* Falls through. */
+			case 30: sum += qlp_coeff[29] * data[i-30]; /* Falls through. */
+			case 29: sum += qlp_coeff[28] * data[i-29]; /* Falls through. */
+			case 28: sum += qlp_coeff[27] * data[i-28]; /* Falls through. */
+			case 27: sum += qlp_coeff[26] * data[i-27]; /* Falls through. */
+			case 26: sum += qlp_coeff[25] * data[i-26]; /* Falls through. */
+			case 25: sum += qlp_coeff[24] * data[i-25]; /* Falls through. */
+			case 24: sum += qlp_coeff[23] * data[i-24]; /* Falls through. */
+			case 23: sum += qlp_coeff[22] * data[i-23]; /* Falls through. */
+			case 22: sum += qlp_coeff[21] * data[i-22]; /* Falls through. */
+			case 21: sum += qlp_coeff[20] * data[i-21]; /* Falls through. */
+			case 20: sum += qlp_coeff[19] * data[i-20]; /* Falls through. */
+			case 19: sum += qlp_coeff[18] * data[i-19]; /* Falls through. */
+			case 18: sum += qlp_coeff[17] * data[i-18]; /* Falls through. */
+			case 17: sum += qlp_coeff[16] * data[i-17]; /* Falls through. */
+			case 16: sum += qlp_coeff[15] * data[i-16]; /* Falls through. */
+			case 15: sum += qlp_coeff[14] * data[i-15]; /* Falls through. */
+			case 14: sum += qlp_coeff[13] * data[i-14]; /* Falls through. */
+			case 13: sum += qlp_coeff[12] * data[i-13]; /* Falls through. */
+			case 12: sum += qlp_coeff[11] * data[i-12]; /* Falls through. */
+			case 11: sum += qlp_coeff[10] * data[i-11]; /* Falls through. */
+			case 10: sum += qlp_coeff[ 9] * data[i-10]; /* Falls through. */
+			case  9: sum += qlp_coeff[ 8] * data[i- 9]; /* Falls through. */
+			case  8: sum += qlp_coeff[ 7] * data[i- 8]; /* Falls through. */
+			case  7: sum += qlp_coeff[ 6] * data[i- 7]; /* Falls through. */
+			case  6: sum += qlp_coeff[ 5] * data[i- 6]; /* Falls through. */
+			case  5: sum += qlp_coeff[ 4] * data[i- 5]; /* Falls through. */
+			case  4: sum += qlp_coeff[ 3] * data[i- 4]; /* Falls through. */
+			case  3: sum += qlp_coeff[ 2] * data[i- 3]; /* Falls through. */
+			case  2: sum += qlp_coeff[ 1] * data[i- 2]; /* Falls through. */
+			case  1: sum += qlp_coeff[ 0] * data[i- 1];
+		}
+		residual_to_check = data[i] - (sum >> lp_quantization);
+		/* residual must not be INT32_MIN because abs(INT32_MIN) is undefined */
+		if(residual_to_check <= INT32_MIN || residual_to_check > INT32_MAX)
+			return false;
+		else
+			residual[i] = residual_to_check;
+	}
+	return true;
+}
+
 #endif /* !defined FLAC__INTEGER_ONLY_LIBRARY */
 
-void FLAC__lpc_restore_signal(const FLAC__int32 * flac_restrict residual, unsigned data_len, const FLAC__int32 * flac_restrict qlp_coeff, unsigned order, int lp_quantization, FLAC__int32 * flac_restrict data)
+uint32_t FLAC__lpc_max_prediction_before_shift_bps(uint32_t subframe_bps, const FLAC__int32 * flac_restrict qlp_coeff, uint32_t order)
+{
+	/* This used to be subframe_bps + qlp_coeff_precision + FLAC__bitmath_ilog2(order)
+	 * but that treats both the samples as well as the predictor as unknown. The
+	 * predictor is known however, so taking the log2 of the sum of the absolute values
+	 * of all coefficients is a more accurate representation of the predictor */
+	FLAC__int32 abs_sum_of_qlp_coeff = 0;
+	uint32_t i;
+	for(i = 0; i < order; i++)
+		abs_sum_of_qlp_coeff += abs(qlp_coeff[i]);
+	if(abs_sum_of_qlp_coeff == 0)
+		abs_sum_of_qlp_coeff = 1;
+	return subframe_bps + FLAC__bitmath_silog2(abs_sum_of_qlp_coeff);
+}
+
+
+uint32_t FLAC__lpc_max_residual_bps(uint32_t subframe_bps, const FLAC__int32 * flac_restrict qlp_coeff, uint32_t order, int lp_quantization)
+{
+	FLAC__int32 predictor_sum_bps = FLAC__lpc_max_prediction_before_shift_bps(subframe_bps, qlp_coeff, order) - lp_quantization;
+	if((int)subframe_bps > predictor_sum_bps)
+		return subframe_bps + 1;
+	else
+		return predictor_sum_bps + 1;
+}
+
+#ifdef FUZZING_BUILD_MODE_NO_SANITIZE_SIGNED_INTEGER_OVERFLOW
+/* The attribute below is to silence the undefined sanitizer of oss-fuzz.
+ * Because fuzzing feeds bogus predictors and residual samples to the
+ * decoder, having overflows in this section is unavoidable. Also,
+ * because the calculated values are audio path only, there is no
+ * potential for security problems */
+__attribute__((no_sanitize("signed-integer-overflow")))
+#endif
+void FLAC__lpc_restore_signal(const FLAC__int32 * flac_restrict residual, uint32_t data_len, const FLAC__int32 * flac_restrict qlp_coeff, uint32_t order, int lp_quantization, FLAC__int32 * flac_restrict data)
 #if defined(FLAC__OVERFLOW_DETECT) || !defined(FLAC__LPC_UNROLLED_FILTER_LOOPS)
 {
 	FLAC__int64 sumo;
-	unsigned i, j;
+	uint32_t i, j;
 	FLAC__int32 sum;
 	const FLAC__int32 *r = residual, *history;
 
@@ -805,8 +995,10 @@
 		for(j = 0; j < order; j++) {
 			sum += qlp_coeff[j] * (*(--history));
 			sumo += (FLAC__int64)qlp_coeff[j] * (FLAC__int64)(*history);
+#ifdef FLAC__OVERFLOW_DETECT
 			if(sumo > 2147483647ll || sumo < -2147483648ll)
 				fprintf(stderr,"FLAC__lpc_restore_signal: OVERFLOW, i=%u, j=%u, c=%d, d=%d, sumo=%" PRId64 "\n",i,j,qlp_coeff[j],*history,sumo);
+#endif
 		}
 		*(data++) = *(r++) + (sum >> lp_quantization);
 	}
@@ -1004,25 +1196,25 @@
 		for(i = 0; i < (int)data_len; i++) {
 			sum = 0;
 			switch(order) {
-				case 32: sum += qlp_coeff[31] * data[i-32];
-				case 31: sum += qlp_coeff[30] * data[i-31];
-				case 30: sum += qlp_coeff[29] * data[i-30];
-				case 29: sum += qlp_coeff[28] * data[i-29];
-				case 28: sum += qlp_coeff[27] * data[i-28];
-				case 27: sum += qlp_coeff[26] * data[i-27];
-				case 26: sum += qlp_coeff[25] * data[i-26];
-				case 25: sum += qlp_coeff[24] * data[i-25];
-				case 24: sum += qlp_coeff[23] * data[i-24];
-				case 23: sum += qlp_coeff[22] * data[i-23];
-				case 22: sum += qlp_coeff[21] * data[i-22];
-				case 21: sum += qlp_coeff[20] * data[i-21];
-				case 20: sum += qlp_coeff[19] * data[i-20];
-				case 19: sum += qlp_coeff[18] * data[i-19];
-				case 18: sum += qlp_coeff[17] * data[i-18];
-				case 17: sum += qlp_coeff[16] * data[i-17];
-				case 16: sum += qlp_coeff[15] * data[i-16];
-				case 15: sum += qlp_coeff[14] * data[i-15];
-				case 14: sum += qlp_coeff[13] * data[i-14];
+				case 32: sum += qlp_coeff[31] * data[i-32]; /* Falls through. */
+				case 31: sum += qlp_coeff[30] * data[i-31]; /* Falls through. */
+				case 30: sum += qlp_coeff[29] * data[i-30]; /* Falls through. */
+				case 29: sum += qlp_coeff[28] * data[i-29]; /* Falls through. */
+				case 28: sum += qlp_coeff[27] * data[i-28]; /* Falls through. */
+				case 27: sum += qlp_coeff[26] * data[i-27]; /* Falls through. */
+				case 26: sum += qlp_coeff[25] * data[i-26]; /* Falls through. */
+				case 25: sum += qlp_coeff[24] * data[i-25]; /* Falls through. */
+				case 24: sum += qlp_coeff[23] * data[i-24]; /* Falls through. */
+				case 23: sum += qlp_coeff[22] * data[i-23]; /* Falls through. */
+				case 22: sum += qlp_coeff[21] * data[i-22]; /* Falls through. */
+				case 21: sum += qlp_coeff[20] * data[i-21]; /* Falls through. */
+				case 20: sum += qlp_coeff[19] * data[i-20]; /* Falls through. */
+				case 19: sum += qlp_coeff[18] * data[i-19]; /* Falls through. */
+				case 18: sum += qlp_coeff[17] * data[i-18]; /* Falls through. */
+				case 17: sum += qlp_coeff[16] * data[i-17]; /* Falls through. */
+				case 16: sum += qlp_coeff[15] * data[i-16]; /* Falls through. */
+				case 15: sum += qlp_coeff[14] * data[i-15]; /* Falls through. */
+				case 14: sum += qlp_coeff[13] * data[i-14]; /* Falls through. */
 				case 13: sum += qlp_coeff[12] * data[i-13];
 				         sum += qlp_coeff[11] * data[i-12];
 				         sum += qlp_coeff[10] * data[i-11];
@@ -1043,10 +1235,10 @@
 }
 #endif
 
-void FLAC__lpc_restore_signal_wide(const FLAC__int32 * flac_restrict residual, unsigned data_len, const FLAC__int32 * flac_restrict qlp_coeff, unsigned order, int lp_quantization, FLAC__int32 * flac_restrict data)
+void FLAC__lpc_restore_signal_wide(const FLAC__int32 * flac_restrict residual, uint32_t data_len, const FLAC__int32 * flac_restrict qlp_coeff, uint32_t order, int lp_quantization, FLAC__int32 * flac_restrict data)
 #if defined(FLAC__OVERFLOW_DETECT) || !defined(FLAC__LPC_UNROLLED_FILTER_LOOPS)
 {
-	unsigned i, j;
+	uint32_t i, j;
 	FLAC__int64 sum;
 	const FLAC__int32 *r = residual, *history;
 
@@ -1063,15 +1255,13 @@
 		history = data;
 		for(j = 0; j < order; j++)
 			sum += (FLAC__int64)qlp_coeff[j] * (FLAC__int64)(*(--history));
-		if(FLAC__bitmath_silog2_wide(sum >> lp_quantization) > 32) {
-			fprintf(stderr,"FLAC__lpc_restore_signal_wide: OVERFLOW, i=%u, sum=%" PRId64 "\n", i, (sum >> lp_quantization));
-			break;
-		}
-		if(FLAC__bitmath_silog2_wide((FLAC__int64)(*r) + (sum >> lp_quantization)) > 32) {
+#ifdef FLAC__OVERFLOW_DETECT
+		if(FLAC__bitmath_silog2((FLAC__int64)(*r) + (sum >> lp_quantization)) > 32) {
 			fprintf(stderr,"FLAC__lpc_restore_signal_wide: OVERFLOW, i=%u, residual=%d, sum=%" PRId64 ", data=%" PRId64 "\n", i, *r, (sum >> lp_quantization), ((FLAC__int64)(*r) + (sum >> lp_quantization)));
 			break;
 		}
-		*(data++) = *(r++) + (FLAC__int32)(sum >> lp_quantization);
+#endif
+		*(data++) = (FLAC__int32)(*(r++) + (sum >> lp_quantization));
 	}
 }
 #else /* fully unrolled version for normal use */
@@ -1105,7 +1295,7 @@
 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
-						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
+						data[i] = (FLAC__int32) (residual[i] + (sum >> lp_quantization));
 					}
 				}
 				else { /* order == 11 */
@@ -1122,7 +1312,7 @@
 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
-						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
+						data[i] = (FLAC__int32) (residual[i] + (sum >> lp_quantization));
 					}
 				}
 			}
@@ -1140,7 +1330,7 @@
 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
-						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
+						data[i] = (FLAC__int32) (residual[i] + (sum >> lp_quantization));
 					}
 				}
 				else { /* order == 9 */
@@ -1155,7 +1345,7 @@
 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
-						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
+						data[i] = (FLAC__int32) (residual[i] + (sum >> lp_quantization));
 					}
 				}
 			}
@@ -1173,7 +1363,7 @@
 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
-						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
+						data[i] = (FLAC__int32) (residual[i] + (sum >> lp_quantization));
 					}
 				}
 				else { /* order == 7 */
@@ -1186,7 +1376,7 @@
 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
-						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
+						data[i] = (FLAC__int32) (residual[i] + (sum >> lp_quantization));
 					}
 				}
 			}
@@ -1200,7 +1390,7 @@
 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
-						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
+						data[i] = (FLAC__int32) (residual[i] + (sum >> lp_quantization));
 					}
 				}
 				else { /* order == 5 */
@@ -1211,7 +1401,7 @@
 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
-						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
+						data[i] = (FLAC__int32) (residual[i] + (sum >> lp_quantization));
 					}
 				}
 			}
@@ -1225,7 +1415,7 @@
 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
-						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
+						data[i] = (FLAC__int32) (residual[i] + (sum >> lp_quantization));
 					}
 				}
 				else { /* order == 3 */
@@ -1234,7 +1424,7 @@
 						sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
-						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
+						data[i] = (FLAC__int32) (residual[i] + (sum >> lp_quantization));
 					}
 				}
 			}
@@ -1244,12 +1434,12 @@
 						sum = 0;
 						sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
 						sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
-						data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
+						data[i] = (FLAC__int32) (residual[i] + (sum >> lp_quantization));
 					}
 				}
 				else { /* order == 1 */
 					for(i = 0; i < (int)data_len; i++)
-						data[i] = residual[i] + (FLAC__int32)((qlp_coeff[0] * (FLAC__int64)data[i-1]) >> lp_quantization);
+						data[i] = (FLAC__int32)(residual[i] + ((qlp_coeff[0] * (FLAC__int64)data[i-1]) >> lp_quantization));
 				}
 			}
 		}
@@ -1258,25 +1448,25 @@
 		for(i = 0; i < (int)data_len; i++) {
 			sum = 0;
 			switch(order) {
-				case 32: sum += qlp_coeff[31] * (FLAC__int64)data[i-32];
-				case 31: sum += qlp_coeff[30] * (FLAC__int64)data[i-31];
-				case 30: sum += qlp_coeff[29] * (FLAC__int64)data[i-30];
-				case 29: sum += qlp_coeff[28] * (FLAC__int64)data[i-29];
-				case 28: sum += qlp_coeff[27] * (FLAC__int64)data[i-28];
-				case 27: sum += qlp_coeff[26] * (FLAC__int64)data[i-27];
-				case 26: sum += qlp_coeff[25] * (FLAC__int64)data[i-26];
-				case 25: sum += qlp_coeff[24] * (FLAC__int64)data[i-25];
-				case 24: sum += qlp_coeff[23] * (FLAC__int64)data[i-24];
-				case 23: sum += qlp_coeff[22] * (FLAC__int64)data[i-23];
-				case 22: sum += qlp_coeff[21] * (FLAC__int64)data[i-22];
-				case 21: sum += qlp_coeff[20] * (FLAC__int64)data[i-21];
-				case 20: sum += qlp_coeff[19] * (FLAC__int64)data[i-20];
-				case 19: sum += qlp_coeff[18] * (FLAC__int64)data[i-19];
-				case 18: sum += qlp_coeff[17] * (FLAC__int64)data[i-18];
-				case 17: sum += qlp_coeff[16] * (FLAC__int64)data[i-17];
-				case 16: sum += qlp_coeff[15] * (FLAC__int64)data[i-16];
-				case 15: sum += qlp_coeff[14] * (FLAC__int64)data[i-15];
-				case 14: sum += qlp_coeff[13] * (FLAC__int64)data[i-14];
+				case 32: sum += qlp_coeff[31] * (FLAC__int64)data[i-32]; /* Falls through. */
+				case 31: sum += qlp_coeff[30] * (FLAC__int64)data[i-31]; /* Falls through. */
+				case 30: sum += qlp_coeff[29] * (FLAC__int64)data[i-30]; /* Falls through. */
+				case 29: sum += qlp_coeff[28] * (FLAC__int64)data[i-29]; /* Falls through. */
+				case 28: sum += qlp_coeff[27] * (FLAC__int64)data[i-28]; /* Falls through. */
+				case 27: sum += qlp_coeff[26] * (FLAC__int64)data[i-27]; /* Falls through. */
+				case 26: sum += qlp_coeff[25] * (FLAC__int64)data[i-26]; /* Falls through. */
+				case 25: sum += qlp_coeff[24] * (FLAC__int64)data[i-25]; /* Falls through. */
+				case 24: sum += qlp_coeff[23] * (FLAC__int64)data[i-24]; /* Falls through. */
+				case 23: sum += qlp_coeff[22] * (FLAC__int64)data[i-23]; /* Falls through. */
+				case 22: sum += qlp_coeff[21] * (FLAC__int64)data[i-22]; /* Falls through. */
+				case 21: sum += qlp_coeff[20] * (FLAC__int64)data[i-21]; /* Falls through. */
+				case 20: sum += qlp_coeff[19] * (FLAC__int64)data[i-20]; /* Falls through. */
+				case 19: sum += qlp_coeff[18] * (FLAC__int64)data[i-19]; /* Falls through. */
+				case 18: sum += qlp_coeff[17] * (FLAC__int64)data[i-18]; /* Falls through. */
+				case 17: sum += qlp_coeff[16] * (FLAC__int64)data[i-17]; /* Falls through. */
+				case 16: sum += qlp_coeff[15] * (FLAC__int64)data[i-16]; /* Falls through. */
+				case 15: sum += qlp_coeff[14] * (FLAC__int64)data[i-15]; /* Falls through. */
+				case 14: sum += qlp_coeff[13] * (FLAC__int64)data[i-14]; /* Falls through. */
 				case 13: sum += qlp_coeff[12] * (FLAC__int64)data[i-13];
 				         sum += qlp_coeff[11] * (FLAC__int64)data[i-12];
 				         sum += qlp_coeff[10] * (FLAC__int64)data[i-11];
@@ -1291,33 +1481,114 @@
 				         sum += qlp_coeff[ 1] * (FLAC__int64)data[i- 2];
 				         sum += qlp_coeff[ 0] * (FLAC__int64)data[i- 1];
 			}
-			data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
+			data[i] = (FLAC__int32) (residual[i] + (sum >> lp_quantization));
 		}
 	}
 }
 #endif
 
+#ifdef FUZZING_BUILD_MODE_NO_SANITIZE_SIGNED_INTEGER_OVERFLOW
+/* The attribute below is to silence the undefined sanitizer of oss-fuzz.
+ * Because fuzzing feeds bogus predictors and residual samples to the
+ * decoder, having overflows in this section is unavoidable. Also,
+ * because the calculated values are audio path only, there is no
+ * potential for security problems */
+__attribute__((no_sanitize("signed-integer-overflow")))
+#endif
+void FLAC__lpc_restore_signal_wide_33bit(const FLAC__int32 * flac_restrict residual, uint32_t data_len, const FLAC__int32 * flac_restrict qlp_coeff, uint32_t order, int lp_quantization, FLAC__int64 * flac_restrict data)
+#if defined(FLAC__OVERFLOW_DETECT) || !defined(FLAC__LPC_UNROLLED_FILTER_LOOPS)
+{
+	uint32_t i, j;
+	FLAC__int64 sum;
+	const FLAC__int32 *r = residual;
+	const FLAC__int64 *history;
+
+	FLAC__ASSERT(order > 0);
+
+	for(i = 0; i < data_len; i++) {
+		sum = 0;
+		history = data;
+		for(j = 0; j < order; j++)
+			sum += (FLAC__int64)qlp_coeff[j] * (FLAC__int64)(*(--history));
+#ifdef FLAC__OVERFLOW_DETECT
+		if(FLAC__bitmath_silog2((FLAC__int64)(*r) + (sum >> lp_quantization)) > 33) {
+			fprintf(stderr,"FLAC__lpc_restore_signal_33bit: OVERFLOW, i=%u, residual=%d, sum=%" PRId64 ", data=%" PRId64 "\n", i, *r, (sum >> lp_quantization), ((FLAC__int64)(*r) + (sum >> lp_quantization)));
+			break;
+		}
+#endif
+		*(data++) = (FLAC__int64)(*(r++)) + (sum >> lp_quantization);
+	}
+}
+#else /* unrolled version for normal use */
+{
+	int i;
+	FLAC__int64 sum;
+
+	FLAC__ASSERT(order > 0);
+	FLAC__ASSERT(order <= 32);
+
+	for(i = 0; i < (int)data_len; i++) {
+		sum = 0;
+		switch(order) {
+			case 32: sum += qlp_coeff[31] * data[i-32]; /* Falls through. */
+			case 31: sum += qlp_coeff[30] * data[i-31]; /* Falls through. */
+			case 30: sum += qlp_coeff[29] * data[i-30]; /* Falls through. */
+			case 29: sum += qlp_coeff[28] * data[i-29]; /* Falls through. */
+			case 28: sum += qlp_coeff[27] * data[i-28]; /* Falls through. */
+			case 27: sum += qlp_coeff[26] * data[i-27]; /* Falls through. */
+			case 26: sum += qlp_coeff[25] * data[i-26]; /* Falls through. */
+			case 25: sum += qlp_coeff[24] * data[i-25]; /* Falls through. */
+			case 24: sum += qlp_coeff[23] * data[i-24]; /* Falls through. */
+			case 23: sum += qlp_coeff[22] * data[i-23]; /* Falls through. */
+			case 22: sum += qlp_coeff[21] * data[i-22]; /* Falls through. */
+			case 21: sum += qlp_coeff[20] * data[i-21]; /* Falls through. */
+			case 20: sum += qlp_coeff[19] * data[i-20]; /* Falls through. */
+			case 19: sum += qlp_coeff[18] * data[i-19]; /* Falls through. */
+			case 18: sum += qlp_coeff[17] * data[i-18]; /* Falls through. */
+			case 17: sum += qlp_coeff[16] * data[i-17]; /* Falls through. */
+			case 16: sum += qlp_coeff[15] * data[i-16]; /* Falls through. */
+			case 15: sum += qlp_coeff[14] * data[i-15]; /* Falls through. */
+			case 14: sum += qlp_coeff[13] * data[i-14]; /* Falls through. */
+			case 13: sum += qlp_coeff[12] * data[i-13]; /* Falls through. */
+			case 12: sum += qlp_coeff[11] * data[i-12]; /* Falls through. */
+			case 11: sum += qlp_coeff[10] * data[i-11]; /* Falls through. */
+			case 10: sum += qlp_coeff[ 9] * data[i-10]; /* Falls through. */
+			case  9: sum += qlp_coeff[ 8] * data[i- 9]; /* Falls through. */
+			case  8: sum += qlp_coeff[ 7] * data[i- 8]; /* Falls through. */
+			case  7: sum += qlp_coeff[ 6] * data[i- 7]; /* Falls through. */
+			case  6: sum += qlp_coeff[ 5] * data[i- 6]; /* Falls through. */
+			case  5: sum += qlp_coeff[ 4] * data[i- 5]; /* Falls through. */
+			case  4: sum += qlp_coeff[ 3] * data[i- 4]; /* Falls through. */
+			case  3: sum += qlp_coeff[ 2] * data[i- 3]; /* Falls through. */
+			case  2: sum += qlp_coeff[ 1] * data[i- 2]; /* Falls through. */
+			case  1: sum += qlp_coeff[ 0] * data[i- 1];
+		}
+		data[i] = residual[i] + (sum >> lp_quantization);
+	}
+}
+#endif
+
 #if defined(_MSC_VER)
 #pragma warning ( default : 4028 )
 #endif
 
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
 
-FLAC__double FLAC__lpc_compute_expected_bits_per_residual_sample(FLAC__double lpc_error, unsigned total_samples)
+double FLAC__lpc_compute_expected_bits_per_residual_sample(double lpc_error, uint32_t total_samples)
 {
-	FLAC__double error_scale;
+	double error_scale;
 
 	FLAC__ASSERT(total_samples > 0);
 
-	error_scale = 0.5 * M_LN2 * M_LN2 / (FLAC__double)total_samples;
+	error_scale = 0.5 / (double)total_samples;
 
 	return FLAC__lpc_compute_expected_bits_per_residual_sample_with_error_scale(lpc_error, error_scale);
 }
 
-FLAC__double FLAC__lpc_compute_expected_bits_per_residual_sample_with_error_scale(FLAC__double lpc_error, FLAC__double error_scale)
+double FLAC__lpc_compute_expected_bits_per_residual_sample_with_error_scale(double lpc_error, double error_scale)
 {
 	if(lpc_error > 0.0) {
-		FLAC__double bps = (FLAC__double)0.5 * log(error_scale * lpc_error) / M_LN2;
+		double bps = (double)0.5 * log(error_scale * lpc_error) / M_LN2;
 		if(bps >= 0.0)
 			return bps;
 		else
@@ -1331,21 +1602,21 @@
 	}
 }
 
-unsigned FLAC__lpc_compute_best_order(const FLAC__double lpc_error[], unsigned max_order, unsigned total_samples, unsigned overhead_bits_per_order)
+uint32_t FLAC__lpc_compute_best_order(const double lpc_error[], uint32_t max_order, uint32_t total_samples, uint32_t overhead_bits_per_order)
 {
-	unsigned order, indx, best_index; /* 'index' the index into lpc_error; index==order-1 since lpc_error[0] is for order==1, lpc_error[1] is for order==2, etc */
-	FLAC__double bits, best_bits, error_scale;
+	uint32_t order, indx, best_index; /* 'index' the index into lpc_error; index==order-1 since lpc_error[0] is for order==1, lpc_error[1] is for order==2, etc */
+	double bits, best_bits, error_scale;
 
 	FLAC__ASSERT(max_order > 0);
 	FLAC__ASSERT(total_samples > 0);
 
-	error_scale = 0.5 * M_LN2 * M_LN2 / (FLAC__double)total_samples;
+	error_scale = 0.5 / (double)total_samples;
 
 	best_index = 0;
-	best_bits = (unsigned)(-1);
+	best_bits = (uint32_t)(-1);
 
 	for(indx = 0, order = 1; indx < max_order; indx++, order++) {
-		bits = FLAC__lpc_compute_expected_bits_per_residual_sample_with_error_scale(lpc_error[indx], error_scale) * (FLAC__double)(total_samples - order) + (FLAC__double)(order * overhead_bits_per_order);
+		bits = FLAC__lpc_compute_expected_bits_per_residual_sample_with_error_scale(lpc_error[indx], error_scale) * (double)(total_samples - order) + (double)(order * overhead_bits_per_order);
 		if(bits < best_bits) {
 			best_index = indx;
 			best_bits = bits;

diff --git a/src/libFLAC/lpc_intrin_avx2.c b/src/libFLAC/lpc_intrin_avx2.c
index 8eec85e..48bd7a8 100644
--- a/src/libFLAC/lpc_intrin_avx2.c
+++ b/src/libFLAC/lpc_intrin_avx2.c

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2000-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -34,9 +34,11 @@
 #  include <config.h>
 #endif
 
+#include "private/cpu.h"
+
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
 #ifndef FLAC__NO_ASM
-#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
+#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN
 #include "private/lpc.h"
 #ifdef FLAC__AVX2_SUPPORTED
 
@@ -46,11 +48,11 @@
 #include <immintrin.h> /* AVX2 */
 
 FLAC__SSE_TARGET("avx2")
-void FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
+void FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[])
 {
 	int i;
 	FLAC__int32 sum;
-	__m128i cnt = _mm_cvtsi32_si128(lp_quantization);
+	const __m128i cnt = _mm_cvtsi32_si128(lp_quantization);
 
 	FLAC__ASSERT(order > 0);
 	FLAC__ASSERT(order <= 32);
@@ -75,20 +77,20 @@
 
 					for(i = 0; i < (int)data_len-7; i+=8) {
 						__m256i summ, mull;
-						summ = _mm256_madd_epi16(q11, _mm256_loadu_si256((const __m256i*)(data+i-12)));
-						mull = _mm256_madd_epi16(q10, _mm256_loadu_si256((const __m256i*)(data+i-11))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q9,  _mm256_loadu_si256((const __m256i*)(data+i-10))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q8,  _mm256_loadu_si256((const __m256i*)(data+i-9 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q7,  _mm256_loadu_si256((const __m256i*)(data+i-8 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q6,  _mm256_loadu_si256((const __m256i*)(data+i-7 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q5,  _mm256_loadu_si256((const __m256i*)(data+i-6 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q4,  _mm256_loadu_si256((const __m256i*)(data+i-5 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q3,  _mm256_loadu_si256((const __m256i*)(data+i-4 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q2,  _mm256_loadu_si256((const __m256i*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q1,  _mm256_loadu_si256((const __m256i*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q0,  _mm256_loadu_si256((const __m256i*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull);
+						summ = _mm256_madd_epi16(q11, _mm256_loadu_si256((const __m256i*)(const void*)(data+i-12)));
+						mull = _mm256_madd_epi16(q10, _mm256_loadu_si256((const __m256i*)(const void*)(data+i-11))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q9,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-10))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q8,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-9 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q7,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-8 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q6,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-7 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q5,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-6 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q4,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-5 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q3,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-4 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q2,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q1,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q0,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull);
 						summ = _mm256_sra_epi32(summ, cnt);
-						_mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ));
+						_mm256_storeu_si256((__m256i*)(void*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(const void*)(data+i)), summ));
 					}
 				}
 				else { /* order == 11 */
@@ -107,19 +109,19 @@
 
 					for(i = 0; i < (int)data_len-7; i+=8) {
 						__m256i summ, mull;
-						summ = _mm256_madd_epi16(q10, _mm256_loadu_si256((const __m256i*)(data+i-11)));
-						mull = _mm256_madd_epi16(q9,  _mm256_loadu_si256((const __m256i*)(data+i-10))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q8,  _mm256_loadu_si256((const __m256i*)(data+i-9 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q7,  _mm256_loadu_si256((const __m256i*)(data+i-8 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q6,  _mm256_loadu_si256((const __m256i*)(data+i-7 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q5,  _mm256_loadu_si256((const __m256i*)(data+i-6 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q4,  _mm256_loadu_si256((const __m256i*)(data+i-5 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q3,  _mm256_loadu_si256((const __m256i*)(data+i-4 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q2,  _mm256_loadu_si256((const __m256i*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q1,  _mm256_loadu_si256((const __m256i*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q0,  _mm256_loadu_si256((const __m256i*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull);
+						summ = _mm256_madd_epi16(q10, _mm256_loadu_si256((const __m256i*)(const void*)(data+i-11)));
+						mull = _mm256_madd_epi16(q9,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-10))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q8,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-9 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q7,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-8 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q6,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-7 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q5,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-6 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q4,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-5 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q3,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-4 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q2,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q1,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q0,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull);
 						summ = _mm256_sra_epi32(summ, cnt);
-						_mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ));
+						_mm256_storeu_si256((__m256i*)(void*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(const void*)(data+i)), summ));
 					}
 				}
 			}
@@ -139,18 +141,18 @@
 
 					for(i = 0; i < (int)data_len-7; i+=8) {
 						__m256i summ, mull;
-						summ = _mm256_madd_epi16(q9,  _mm256_loadu_si256((const __m256i*)(data+i-10)));
-						mull = _mm256_madd_epi16(q8,  _mm256_loadu_si256((const __m256i*)(data+i-9 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q7,  _mm256_loadu_si256((const __m256i*)(data+i-8 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q6,  _mm256_loadu_si256((const __m256i*)(data+i-7 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q5,  _mm256_loadu_si256((const __m256i*)(data+i-6 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q4,  _mm256_loadu_si256((const __m256i*)(data+i-5 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q3,  _mm256_loadu_si256((const __m256i*)(data+i-4 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q2,  _mm256_loadu_si256((const __m256i*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q1,  _mm256_loadu_si256((const __m256i*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q0,  _mm256_loadu_si256((const __m256i*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull);
+						summ = _mm256_madd_epi16(q9,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-10)));
+						mull = _mm256_madd_epi16(q8,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-9 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q7,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-8 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q6,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-7 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q5,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-6 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q4,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-5 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q3,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-4 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q2,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q1,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q0,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull);
 						summ = _mm256_sra_epi32(summ, cnt);
-						_mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ));
+						_mm256_storeu_si256((__m256i*)(void*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(const void*)(data+i)), summ));
 					}
 				}
 				else { /* order == 9 */
@@ -167,17 +169,17 @@
 
 					for(i = 0; i < (int)data_len-7; i+=8) {
 						__m256i summ, mull;
-						summ = _mm256_madd_epi16(q8,  _mm256_loadu_si256((const __m256i*)(data+i-9 )));
-						mull = _mm256_madd_epi16(q7,  _mm256_loadu_si256((const __m256i*)(data+i-8 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q6,  _mm256_loadu_si256((const __m256i*)(data+i-7 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q5,  _mm256_loadu_si256((const __m256i*)(data+i-6 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q4,  _mm256_loadu_si256((const __m256i*)(data+i-5 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q3,  _mm256_loadu_si256((const __m256i*)(data+i-4 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q2,  _mm256_loadu_si256((const __m256i*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q1,  _mm256_loadu_si256((const __m256i*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q0,  _mm256_loadu_si256((const __m256i*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull);
+						summ = _mm256_madd_epi16(q8,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-9 )));
+						mull = _mm256_madd_epi16(q7,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-8 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q6,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-7 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q5,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-6 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q4,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-5 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q3,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-4 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q2,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q1,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q0,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull);
 						summ = _mm256_sra_epi32(summ, cnt);
-						_mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ));
+						_mm256_storeu_si256((__m256i*)(void*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(const void*)(data+i)), summ));
 					}
 				}
 			}
@@ -197,16 +199,16 @@
 
 					for(i = 0; i < (int)data_len-7; i+=8) {
 						__m256i summ, mull;
-						summ = _mm256_madd_epi16(q7,  _mm256_loadu_si256((const __m256i*)(data+i-8 )));
-						mull = _mm256_madd_epi16(q6,  _mm256_loadu_si256((const __m256i*)(data+i-7 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q5,  _mm256_loadu_si256((const __m256i*)(data+i-6 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q4,  _mm256_loadu_si256((const __m256i*)(data+i-5 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q3,  _mm256_loadu_si256((const __m256i*)(data+i-4 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q2,  _mm256_loadu_si256((const __m256i*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q1,  _mm256_loadu_si256((const __m256i*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q0,  _mm256_loadu_si256((const __m256i*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull);
+						summ = _mm256_madd_epi16(q7,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-8 )));
+						mull = _mm256_madd_epi16(q6,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-7 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q5,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-6 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q4,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-5 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q3,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-4 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q2,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q1,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q0,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull);
 						summ = _mm256_sra_epi32(summ, cnt);
-						_mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ));
+						_mm256_storeu_si256((__m256i*)(void*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(const void*)(data+i)), summ));
 					}
 				}
 				else { /* order == 7 */
@@ -221,15 +223,15 @@
 
 					for(i = 0; i < (int)data_len-7; i+=8) {
 						__m256i summ, mull;
-						summ = _mm256_madd_epi16(q6,  _mm256_loadu_si256((const __m256i*)(data+i-7 )));
-						mull = _mm256_madd_epi16(q5,  _mm256_loadu_si256((const __m256i*)(data+i-6 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q4,  _mm256_loadu_si256((const __m256i*)(data+i-5 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q3,  _mm256_loadu_si256((const __m256i*)(data+i-4 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q2,  _mm256_loadu_si256((const __m256i*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q1,  _mm256_loadu_si256((const __m256i*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q0,  _mm256_loadu_si256((const __m256i*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull);
+						summ = _mm256_madd_epi16(q6,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-7 )));
+						mull = _mm256_madd_epi16(q5,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-6 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q4,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-5 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q3,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-4 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q2,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q1,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q0,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull);
 						summ = _mm256_sra_epi32(summ, cnt);
-						_mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ));
+						_mm256_storeu_si256((__m256i*)(void*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(const void*)(data+i)), summ));
 					}
 				}
 			}
@@ -245,14 +247,14 @@
 
 					for(i = 0; i < (int)data_len-7; i+=8) {
 						__m256i summ, mull;
-						summ = _mm256_madd_epi16(q5,  _mm256_loadu_si256((const __m256i*)(data+i-6 )));
-						mull = _mm256_madd_epi16(q4,  _mm256_loadu_si256((const __m256i*)(data+i-5 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q3,  _mm256_loadu_si256((const __m256i*)(data+i-4 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q2,  _mm256_loadu_si256((const __m256i*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q1,  _mm256_loadu_si256((const __m256i*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q0,  _mm256_loadu_si256((const __m256i*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull);
+						summ = _mm256_madd_epi16(q5,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-6 )));
+						mull = _mm256_madd_epi16(q4,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-5 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q3,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-4 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q2,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q1,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q0,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull);
 						summ = _mm256_sra_epi32(summ, cnt);
-						_mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ));
+						_mm256_storeu_si256((__m256i*)(void*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(const void*)(data+i)), summ));
 					}
 				}
 				else { /* order == 5 */
@@ -265,13 +267,13 @@
 
 					for(i = 0; i < (int)data_len-7; i+=8) {
 						__m256i summ, mull;
-						summ = _mm256_madd_epi16(q4,  _mm256_loadu_si256((const __m256i*)(data+i-5 )));
-						mull = _mm256_madd_epi16(q3,  _mm256_loadu_si256((const __m256i*)(data+i-4 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q2,  _mm256_loadu_si256((const __m256i*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q1,  _mm256_loadu_si256((const __m256i*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q0,  _mm256_loadu_si256((const __m256i*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull);
+						summ = _mm256_madd_epi16(q4,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-5 )));
+						mull = _mm256_madd_epi16(q3,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-4 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q2,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q1,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q0,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull);
 						summ = _mm256_sra_epi32(summ, cnt);
-						_mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ));
+						_mm256_storeu_si256((__m256i*)(void*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(const void*)(data+i)), summ));
 					}
 				}
 			}
@@ -287,12 +289,12 @@
 
 					for(i = 0; i < (int)data_len-7; i+=8) {
 						__m256i summ, mull;
-						summ = _mm256_madd_epi16(q3,  _mm256_loadu_si256((const __m256i*)(data+i-4 )));
-						mull = _mm256_madd_epi16(q2,  _mm256_loadu_si256((const __m256i*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q1,  _mm256_loadu_si256((const __m256i*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q0,  _mm256_loadu_si256((const __m256i*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull);
+						summ = _mm256_madd_epi16(q3,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-4 )));
+						mull = _mm256_madd_epi16(q2,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-3 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q1,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q0,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull);
 						summ = _mm256_sra_epi32(summ, cnt);
-						_mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ));
+						_mm256_storeu_si256((__m256i*)(void*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(const void*)(data+i)), summ));
 					}
 				}
 				else { /* order == 3 */
@@ -303,11 +305,11 @@
 
 					for(i = 0; i < (int)data_len-7; i+=8) {
 						__m256i summ, mull;
-						summ = _mm256_madd_epi16(q2,  _mm256_loadu_si256((const __m256i*)(data+i-3 )));
-						mull = _mm256_madd_epi16(q1,  _mm256_loadu_si256((const __m256i*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_madd_epi16(q0,  _mm256_loadu_si256((const __m256i*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull);
+						summ = _mm256_madd_epi16(q2,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-3 )));
+						mull = _mm256_madd_epi16(q1,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-2 ))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_madd_epi16(q0,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull);
 						summ = _mm256_sra_epi32(summ, cnt);
-						_mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ));
+						_mm256_storeu_si256((__m256i*)(void*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(const void*)(data+i)), summ));
 					}
 				}
 			}
@@ -319,10 +321,10 @@
 
 					for(i = 0; i < (int)data_len-7; i+=8) {
 						__m256i summ, mull;
-						summ = _mm256_madd_epi16(q1,  _mm256_loadu_si256((const __m256i*)(data+i-2 )));
-						mull = _mm256_madd_epi16(q0,  _mm256_loadu_si256((const __m256i*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull);
+						summ = _mm256_madd_epi16(q1,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-2 )));
+						mull = _mm256_madd_epi16(q0,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-1 ))); summ = _mm256_add_epi32(summ, mull);
 						summ = _mm256_sra_epi32(summ, cnt);
-						_mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ));
+						_mm256_storeu_si256((__m256i*)(void*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(const void*)(data+i)), summ));
 					}
 				}
 				else { /* order == 1 */
@@ -331,9 +333,9 @@
 
 					for(i = 0; i < (int)data_len-7; i+=8) {
 						__m256i summ;
-						summ = _mm256_madd_epi16(q0,  _mm256_loadu_si256((const __m256i*)(data+i-1 )));
+						summ = _mm256_madd_epi16(q0,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-1 )));
 						summ = _mm256_sra_epi32(summ, cnt);
-						_mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ));
+						_mm256_storeu_si256((__m256i*)(void*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(const void*)(data+i)), summ));
 					}
 				}
 			}
@@ -341,17 +343,17 @@
 		for(; i < (int)data_len; i++) {
 			sum = 0;
 			switch(order) {
-				case 12: sum += qlp_coeff[11] * data[i-12];
-				case 11: sum += qlp_coeff[10] * data[i-11];
-				case 10: sum += qlp_coeff[ 9] * data[i-10];
-				case 9:  sum += qlp_coeff[ 8] * data[i- 9];
-				case 8:  sum += qlp_coeff[ 7] * data[i- 8];
-				case 7:  sum += qlp_coeff[ 6] * data[i- 7];
-				case 6:  sum += qlp_coeff[ 5] * data[i- 6];
-				case 5:  sum += qlp_coeff[ 4] * data[i- 5];
-				case 4:  sum += qlp_coeff[ 3] * data[i- 4];
-				case 3:  sum += qlp_coeff[ 2] * data[i- 3];
-				case 2:  sum += qlp_coeff[ 1] * data[i- 2];
+				case 12: sum += qlp_coeff[11] * data[i-12]; /* Falls through. */
+				case 11: sum += qlp_coeff[10] * data[i-11]; /* Falls through. */
+				case 10: sum += qlp_coeff[ 9] * data[i-10]; /* Falls through. */
+				case 9:  sum += qlp_coeff[ 8] * data[i- 9]; /* Falls through. */
+				case 8:  sum += qlp_coeff[ 7] * data[i- 8]; /* Falls through. */
+				case 7:  sum += qlp_coeff[ 6] * data[i- 7]; /* Falls through. */
+				case 6:  sum += qlp_coeff[ 5] * data[i- 6]; /* Falls through. */
+				case 5:  sum += qlp_coeff[ 4] * data[i- 5]; /* Falls through. */
+				case 4:  sum += qlp_coeff[ 3] * data[i- 4]; /* Falls through. */
+				case 3:  sum += qlp_coeff[ 2] * data[i- 3]; /* Falls through. */
+				case 2:  sum += qlp_coeff[ 1] * data[i- 2]; /* Falls through. */
 				case 1:  sum += qlp_coeff[ 0] * data[i- 1];
 			}
 			residual[i] = data[i] - (sum >> lp_quantization);
@@ -361,25 +363,25 @@
 		for(i = 0; i < (int)data_len; i++) {
 			sum = 0;
 			switch(order) {
-				case 32: sum += qlp_coeff[31] * data[i-32];
-				case 31: sum += qlp_coeff[30] * data[i-31];
-				case 30: sum += qlp_coeff[29] * data[i-30];
-				case 29: sum += qlp_coeff[28] * data[i-29];
-				case 28: sum += qlp_coeff[27] * data[i-28];
-				case 27: sum += qlp_coeff[26] * data[i-27];
-				case 26: sum += qlp_coeff[25] * data[i-26];
-				case 25: sum += qlp_coeff[24] * data[i-25];
-				case 24: sum += qlp_coeff[23] * data[i-24];
-				case 23: sum += qlp_coeff[22] * data[i-23];
-				case 22: sum += qlp_coeff[21] * data[i-22];
-				case 21: sum += qlp_coeff[20] * data[i-21];
-				case 20: sum += qlp_coeff[19] * data[i-20];
-				case 19: sum += qlp_coeff[18] * data[i-19];
-				case 18: sum += qlp_coeff[17] * data[i-18];
-				case 17: sum += qlp_coeff[16] * data[i-17];
-				case 16: sum += qlp_coeff[15] * data[i-16];
-				case 15: sum += qlp_coeff[14] * data[i-15];
-				case 14: sum += qlp_coeff[13] * data[i-14];
+				case 32: sum += qlp_coeff[31] * data[i-32]; /* Falls through. */
+				case 31: sum += qlp_coeff[30] * data[i-31]; /* Falls through. */
+				case 30: sum += qlp_coeff[29] * data[i-30]; /* Falls through. */
+				case 29: sum += qlp_coeff[28] * data[i-29]; /* Falls through. */
+				case 28: sum += qlp_coeff[27] * data[i-28]; /* Falls through. */
+				case 27: sum += qlp_coeff[26] * data[i-27]; /* Falls through. */
+				case 26: sum += qlp_coeff[25] * data[i-26]; /* Falls through. */
+				case 25: sum += qlp_coeff[24] * data[i-25]; /* Falls through. */
+				case 24: sum += qlp_coeff[23] * data[i-24]; /* Falls through. */
+				case 23: sum += qlp_coeff[22] * data[i-23]; /* Falls through. */
+				case 22: sum += qlp_coeff[21] * data[i-22]; /* Falls through. */
+				case 21: sum += qlp_coeff[20] * data[i-21]; /* Falls through. */
+				case 20: sum += qlp_coeff[19] * data[i-20]; /* Falls through. */
+				case 19: sum += qlp_coeff[18] * data[i-19]; /* Falls through. */
+				case 18: sum += qlp_coeff[17] * data[i-18]; /* Falls through. */
+				case 17: sum += qlp_coeff[16] * data[i-17]; /* Falls through. */
+				case 16: sum += qlp_coeff[15] * data[i-16]; /* Falls through. */
+				case 15: sum += qlp_coeff[14] * data[i-15]; /* Falls through. */
+				case 14: sum += qlp_coeff[13] * data[i-14]; /* Falls through. */
 				case 13: sum += qlp_coeff[12] * data[i-13];
 				         sum += qlp_coeff[11] * data[i-12];
 				         sum += qlp_coeff[10] * data[i-11];
@@ -401,11 +403,11 @@
 }
 
 FLAC__SSE_TARGET("avx2")
-void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
+void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[])
 {
 	int i;
 	FLAC__int32 sum;
-	__m128i cnt = _mm_cvtsi32_si128(lp_quantization);
+	const __m128i cnt = _mm_cvtsi32_si128(lp_quantization);
 
 	FLAC__ASSERT(order > 0);
 	FLAC__ASSERT(order <= 32);
@@ -430,20 +432,20 @@
 
 					for(i = 0; i < (int)data_len-7; i+=8) {
 						__m256i summ, mull;
-						summ = _mm256_mullo_epi32(q11, _mm256_loadu_si256((const __m256i*)(data+i-12)));
-						mull = _mm256_mullo_epi32(q10, _mm256_loadu_si256((const __m256i*)(data+i-11))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q9,  _mm256_loadu_si256((const __m256i*)(data+i-10))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q8,  _mm256_loadu_si256((const __m256i*)(data+i-9)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q7,  _mm256_loadu_si256((const __m256i*)(data+i-8)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q6,  _mm256_loadu_si256((const __m256i*)(data+i-7)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q5,  _mm256_loadu_si256((const __m256i*)(data+i-6)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q4,  _mm256_loadu_si256((const __m256i*)(data+i-5)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q3,  _mm256_loadu_si256((const __m256i*)(data+i-4)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q2,  _mm256_loadu_si256((const __m256i*)(data+i-3)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q1,  _mm256_loadu_si256((const __m256i*)(data+i-2)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q0,  _mm256_loadu_si256((const __m256i*)(data+i-1)));  summ = _mm256_add_epi32(summ, mull);
+						summ = _mm256_mullo_epi32(q11, _mm256_loadu_si256((const __m256i*)(const void*)(data+i-12)));
+						mull = _mm256_mullo_epi32(q10, _mm256_loadu_si256((const __m256i*)(const void*)(data+i-11))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q9,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-10))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q8,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-9)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q7,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-8)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q6,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-7)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q5,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-6)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q4,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-5)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q3,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-4)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q2,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-3)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q1,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-2)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q0,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-1)));  summ = _mm256_add_epi32(summ, mull);
 						summ = _mm256_sra_epi32(summ, cnt);
-						_mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ));
+						_mm256_storeu_si256((__m256i*)(void*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(const void*)(data+i)), summ));
 					}
 				}
 				else { /* order == 11 */
@@ -462,19 +464,19 @@
 
 					for(i = 0; i < (int)data_len-7; i+=8) {
 						__m256i summ, mull;
-						summ = _mm256_mullo_epi32(q10, _mm256_loadu_si256((const __m256i*)(data+i-11)));
-						mull = _mm256_mullo_epi32(q9,  _mm256_loadu_si256((const __m256i*)(data+i-10))); summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q8,  _mm256_loadu_si256((const __m256i*)(data+i-9)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q7,  _mm256_loadu_si256((const __m256i*)(data+i-8)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q6,  _mm256_loadu_si256((const __m256i*)(data+i-7)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q5,  _mm256_loadu_si256((const __m256i*)(data+i-6)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q4,  _mm256_loadu_si256((const __m256i*)(data+i-5)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q3,  _mm256_loadu_si256((const __m256i*)(data+i-4)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q2,  _mm256_loadu_si256((const __m256i*)(data+i-3)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q1,  _mm256_loadu_si256((const __m256i*)(data+i-2)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q0,  _mm256_loadu_si256((const __m256i*)(data+i-1)));  summ = _mm256_add_epi32(summ, mull);
+						summ = _mm256_mullo_epi32(q10, _mm256_loadu_si256((const __m256i*)(const void*)(data+i-11)));
+						mull = _mm256_mullo_epi32(q9,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-10))); summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q8,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-9)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q7,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-8)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q6,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-7)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q5,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-6)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q4,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-5)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q3,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-4)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q2,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-3)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q1,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-2)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q0,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-1)));  summ = _mm256_add_epi32(summ, mull);
 						summ = _mm256_sra_epi32(summ, cnt);
-						_mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ));
+						_mm256_storeu_si256((__m256i*)(void*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(const void*)(data+i)), summ));
 					}
 				}
 			}
@@ -494,18 +496,18 @@
 
 					for(i = 0; i < (int)data_len-7; i+=8) {
 						__m256i summ, mull;
-						summ = _mm256_mullo_epi32(q9,  _mm256_loadu_si256((const __m256i*)(data+i-10)));
-						mull = _mm256_mullo_epi32(q8,  _mm256_loadu_si256((const __m256i*)(data+i-9)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q7,  _mm256_loadu_si256((const __m256i*)(data+i-8)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q6,  _mm256_loadu_si256((const __m256i*)(data+i-7)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q5,  _mm256_loadu_si256((const __m256i*)(data+i-6)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q4,  _mm256_loadu_si256((const __m256i*)(data+i-5)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q3,  _mm256_loadu_si256((const __m256i*)(data+i-4)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q2,  _mm256_loadu_si256((const __m256i*)(data+i-3)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q1,  _mm256_loadu_si256((const __m256i*)(data+i-2)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q0,  _mm256_loadu_si256((const __m256i*)(data+i-1)));  summ = _mm256_add_epi32(summ, mull);
+						summ = _mm256_mullo_epi32(q9,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-10)));
+						mull = _mm256_mullo_epi32(q8,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-9)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q7,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-8)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q6,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-7)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q5,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-6)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q4,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-5)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q3,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-4)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q2,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-3)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q1,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-2)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q0,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-1)));  summ = _mm256_add_epi32(summ, mull);
 						summ = _mm256_sra_epi32(summ, cnt);
-						_mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ));
+						_mm256_storeu_si256((__m256i*)(void*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(const void*)(data+i)), summ));
 					}
 				}
 				else { /* order == 9 */
@@ -522,17 +524,17 @@
 
 					for(i = 0; i < (int)data_len-7; i+=8) {
 						__m256i summ, mull;
-						summ = _mm256_mullo_epi32(q8,  _mm256_loadu_si256((const __m256i*)(data+i-9)));
-						mull = _mm256_mullo_epi32(q7,  _mm256_loadu_si256((const __m256i*)(data+i-8)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q6,  _mm256_loadu_si256((const __m256i*)(data+i-7)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q5,  _mm256_loadu_si256((const __m256i*)(data+i-6)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q4,  _mm256_loadu_si256((const __m256i*)(data+i-5)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q3,  _mm256_loadu_si256((const __m256i*)(data+i-4)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q2,  _mm256_loadu_si256((const __m256i*)(data+i-3)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q1,  _mm256_loadu_si256((const __m256i*)(data+i-2)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q0,  _mm256_loadu_si256((const __m256i*)(data+i-1)));  summ = _mm256_add_epi32(summ, mull);
+						summ = _mm256_mullo_epi32(q8,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-9)));
+						mull = _mm256_mullo_epi32(q7,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-8)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q6,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-7)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q5,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-6)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q4,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-5)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q3,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-4)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q2,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-3)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q1,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-2)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q0,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-1)));  summ = _mm256_add_epi32(summ, mull);
 						summ = _mm256_sra_epi32(summ, cnt);
-						_mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ));
+						_mm256_storeu_si256((__m256i*)(void*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(const void*)(data+i)), summ));
 					}
 				}
 			}
@@ -552,16 +554,16 @@
 
 					for(i = 0; i < (int)data_len-7; i+=8) {
 						__m256i summ, mull;
-						summ = _mm256_mullo_epi32(q7,  _mm256_loadu_si256((const __m256i*)(data+i-8)));
-						mull = _mm256_mullo_epi32(q6,  _mm256_loadu_si256((const __m256i*)(data+i-7)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q5,  _mm256_loadu_si256((const __m256i*)(data+i-6)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q4,  _mm256_loadu_si256((const __m256i*)(data+i-5)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q3,  _mm256_loadu_si256((const __m256i*)(data+i-4)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q2,  _mm256_loadu_si256((const __m256i*)(data+i-3)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q1,  _mm256_loadu_si256((const __m256i*)(data+i-2)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q0,  _mm256_loadu_si256((const __m256i*)(data+i-1)));  summ = _mm256_add_epi32(summ, mull);
+						summ = _mm256_mullo_epi32(q7,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-8)));
+						mull = _mm256_mullo_epi32(q6,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-7)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q5,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-6)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q4,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-5)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q3,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-4)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q2,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-3)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q1,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-2)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q0,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-1)));  summ = _mm256_add_epi32(summ, mull);
 						summ = _mm256_sra_epi32(summ, cnt);
-						_mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ));
+						_mm256_storeu_si256((__m256i*)(void*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(const void*)(data+i)), summ));
 					}
 				}
 				else { /* order == 7 */
@@ -576,15 +578,15 @@
 
 					for(i = 0; i < (int)data_len-7; i+=8) {
 						__m256i summ, mull;
-						summ = _mm256_mullo_epi32(q6,  _mm256_loadu_si256((const __m256i*)(data+i-7)));
-						mull = _mm256_mullo_epi32(q5,  _mm256_loadu_si256((const __m256i*)(data+i-6)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q4,  _mm256_loadu_si256((const __m256i*)(data+i-5)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q3,  _mm256_loadu_si256((const __m256i*)(data+i-4)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q2,  _mm256_loadu_si256((const __m256i*)(data+i-3)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q1,  _mm256_loadu_si256((const __m256i*)(data+i-2)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q0,  _mm256_loadu_si256((const __m256i*)(data+i-1)));  summ = _mm256_add_epi32(summ, mull);
+						summ = _mm256_mullo_epi32(q6,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-7)));
+						mull = _mm256_mullo_epi32(q5,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-6)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q4,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-5)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q3,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-4)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q2,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-3)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q1,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-2)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q0,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-1)));  summ = _mm256_add_epi32(summ, mull);
 						summ = _mm256_sra_epi32(summ, cnt);
-						_mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ));
+						_mm256_storeu_si256((__m256i*)(void*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(const void*)(data+i)), summ));
 					}
 				}
 			}
@@ -600,14 +602,14 @@
 
 					for(i = 0; i < (int)data_len-7; i+=8) {
 						__m256i summ, mull;
-						summ = _mm256_mullo_epi32(q5,  _mm256_loadu_si256((const __m256i*)(data+i-6)));
-						mull = _mm256_mullo_epi32(q4,  _mm256_loadu_si256((const __m256i*)(data+i-5)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q3,  _mm256_loadu_si256((const __m256i*)(data+i-4)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q2,  _mm256_loadu_si256((const __m256i*)(data+i-3)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q1,  _mm256_loadu_si256((const __m256i*)(data+i-2)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q0,  _mm256_loadu_si256((const __m256i*)(data+i-1)));  summ = _mm256_add_epi32(summ, mull);
+						summ = _mm256_mullo_epi32(q5,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-6)));
+						mull = _mm256_mullo_epi32(q4,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-5)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q3,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-4)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q2,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-3)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q1,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-2)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q0,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-1)));  summ = _mm256_add_epi32(summ, mull);
 						summ = _mm256_sra_epi32(summ, cnt);
-						_mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ));
+						_mm256_storeu_si256((__m256i*)(void*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(const void*)(data+i)), summ));
 					}
 				}
 				else { /* order == 5 */
@@ -620,13 +622,13 @@
 
 					for(i = 0; i < (int)data_len-7; i+=8) {
 						__m256i summ, mull;
-						summ = _mm256_mullo_epi32(q4,  _mm256_loadu_si256((const __m256i*)(data+i-5)));
-						mull = _mm256_mullo_epi32(q3,  _mm256_loadu_si256((const __m256i*)(data+i-4)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q2,  _mm256_loadu_si256((const __m256i*)(data+i-3)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q1,  _mm256_loadu_si256((const __m256i*)(data+i-2)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q0,  _mm256_loadu_si256((const __m256i*)(data+i-1)));  summ = _mm256_add_epi32(summ, mull);
+						summ = _mm256_mullo_epi32(q4,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-5)));
+						mull = _mm256_mullo_epi32(q3,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-4)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q2,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-3)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q1,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-2)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q0,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-1)));  summ = _mm256_add_epi32(summ, mull);
 						summ = _mm256_sra_epi32(summ, cnt);
-						_mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ));
+						_mm256_storeu_si256((__m256i*)(void*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(const void*)(data+i)), summ));
 					}
 				}
 			}
@@ -642,12 +644,12 @@
 
 					for(i = 0; i < (int)data_len-7; i+=8) {
 						__m256i summ, mull;
-						summ = _mm256_mullo_epi32(q3,  _mm256_loadu_si256((const __m256i*)(data+i-4)));
-						mull = _mm256_mullo_epi32(q2,  _mm256_loadu_si256((const __m256i*)(data+i-3)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q1,  _mm256_loadu_si256((const __m256i*)(data+i-2)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q0,  _mm256_loadu_si256((const __m256i*)(data+i-1)));  summ = _mm256_add_epi32(summ, mull);
+						summ = _mm256_mullo_epi32(q3,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-4)));
+						mull = _mm256_mullo_epi32(q2,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-3)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q1,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-2)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q0,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-1)));  summ = _mm256_add_epi32(summ, mull);
 						summ = _mm256_sra_epi32(summ, cnt);
-						_mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ));
+						_mm256_storeu_si256((__m256i*)(void*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(const void*)(data+i)), summ));
 					}
 				}
 				else { /* order == 3 */
@@ -658,11 +660,11 @@
 
 					for(i = 0; i < (int)data_len-7; i+=8) {
 						__m256i summ, mull;
-						summ = _mm256_mullo_epi32(q2,  _mm256_loadu_si256((const __m256i*)(data+i-3)));
-						mull = _mm256_mullo_epi32(q1,  _mm256_loadu_si256((const __m256i*)(data+i-2)));  summ = _mm256_add_epi32(summ, mull);
-						mull = _mm256_mullo_epi32(q0,  _mm256_loadu_si256((const __m256i*)(data+i-1)));  summ = _mm256_add_epi32(summ, mull);
+						summ = _mm256_mullo_epi32(q2,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-3)));
+						mull = _mm256_mullo_epi32(q1,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-2)));  summ = _mm256_add_epi32(summ, mull);
+						mull = _mm256_mullo_epi32(q0,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-1)));  summ = _mm256_add_epi32(summ, mull);
 						summ = _mm256_sra_epi32(summ, cnt);
-						_mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ));
+						_mm256_storeu_si256((__m256i*)(void*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(const void*)(data+i)), summ));
 					}
 				}
 			}
@@ -674,10 +676,10 @@
 
 					for(i = 0; i < (int)data_len-7; i+=8) {
 						__m256i summ, mull;
-						summ = _mm256_mullo_epi32(q1,  _mm256_loadu_si256((const __m256i*)(data+i-2)));
-						mull = _mm256_mullo_epi32(q0,  _mm256_loadu_si256((const __m256i*)(data+i-1)));  summ = _mm256_add_epi32(summ, mull);
+						summ = _mm256_mullo_epi32(q1,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-2)));
+						mull = _mm256_mullo_epi32(q0,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-1)));  summ = _mm256_add_epi32(summ, mull);
 						summ = _mm256_sra_epi32(summ, cnt);
-						_mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ));
+						_mm256_storeu_si256((__m256i*)(void*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(const void*)(data+i)), summ));
 					}
 				}
 				else { /* order == 1 */
@@ -686,9 +688,9 @@
 
 					for(i = 0; i < (int)data_len-7; i+=8) {
 						__m256i summ;
-						summ = _mm256_mullo_epi32(q0,  _mm256_loadu_si256((const __m256i*)(data+i-1)));
+						summ = _mm256_mullo_epi32(q0,  _mm256_loadu_si256((const __m256i*)(const void*)(data+i-1)));
 						summ = _mm256_sra_epi32(summ, cnt);
-						_mm256_storeu_si256((__m256i*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(data+i)), summ));
+						_mm256_storeu_si256((__m256i*)(void*)(residual+i), _mm256_sub_epi32(_mm256_loadu_si256((const __m256i*)(const void*)(data+i)), summ));
 					}
 				}
 			}
@@ -696,17 +698,17 @@
 		for(; i < (int)data_len; i++) {
 			sum = 0;
 			switch(order) {
-				case 12: sum += qlp_coeff[11] * data[i-12];
-				case 11: sum += qlp_coeff[10] * data[i-11];
-				case 10: sum += qlp_coeff[ 9] * data[i-10];
-				case 9:  sum += qlp_coeff[ 8] * data[i- 9];
-				case 8:  sum += qlp_coeff[ 7] * data[i- 8];
-				case 7:  sum += qlp_coeff[ 6] * data[i- 7];
-				case 6:  sum += qlp_coeff[ 5] * data[i- 6];
-				case 5:  sum += qlp_coeff[ 4] * data[i- 5];
-				case 4:  sum += qlp_coeff[ 3] * data[i- 4];
-				case 3:  sum += qlp_coeff[ 2] * data[i- 3];
-				case 2:  sum += qlp_coeff[ 1] * data[i- 2];
+				case 12: sum += qlp_coeff[11] * data[i-12]; /* Falls through. */
+				case 11: sum += qlp_coeff[10] * data[i-11]; /* Falls through. */
+				case 10: sum += qlp_coeff[ 9] * data[i-10]; /* Falls through. */
+				case 9:  sum += qlp_coeff[ 8] * data[i- 9]; /* Falls through. */
+				case 8:  sum += qlp_coeff[ 7] * data[i- 8]; /* Falls through. */
+				case 7:  sum += qlp_coeff[ 6] * data[i- 7]; /* Falls through. */
+				case 6:  sum += qlp_coeff[ 5] * data[i- 6]; /* Falls through. */
+				case 5:  sum += qlp_coeff[ 4] * data[i- 5]; /* Falls through. */
+				case 4:  sum += qlp_coeff[ 3] * data[i- 4]; /* Falls through. */
+				case 3:  sum += qlp_coeff[ 2] * data[i- 3]; /* Falls through. */
+				case 2:  sum += qlp_coeff[ 1] * data[i- 2]; /* Falls through. */
 				case 1:  sum += qlp_coeff[ 0] * data[i- 1];
 			}
 			residual[i] = data[i] - (sum >> lp_quantization);
@@ -716,25 +718,25 @@
 		for(i = 0; i < (int)data_len; i++) {
 			sum = 0;
 			switch(order) {
-				case 32: sum += qlp_coeff[31] * data[i-32];
-				case 31: sum += qlp_coeff[30] * data[i-31];
-				case 30: sum += qlp_coeff[29] * data[i-30];
-				case 29: sum += qlp_coeff[28] * data[i-29];
-				case 28: sum += qlp_coeff[27] * data[i-28];
-				case 27: sum += qlp_coeff[26] * data[i-27];
-				case 26: sum += qlp_coeff[25] * data[i-26];
-				case 25: sum += qlp_coeff[24] * data[i-25];
-				case 24: sum += qlp_coeff[23] * data[i-24];
-				case 23: sum += qlp_coeff[22] * data[i-23];
-				case 22: sum += qlp_coeff[21] * data[i-22];
-				case 21: sum += qlp_coeff[20] * data[i-21];
-				case 20: sum += qlp_coeff[19] * data[i-20];
-				case 19: sum += qlp_coeff[18] * data[i-19];
-				case 18: sum += qlp_coeff[17] * data[i-18];
-				case 17: sum += qlp_coeff[16] * data[i-17];
-				case 16: sum += qlp_coeff[15] * data[i-16];
-				case 15: sum += qlp_coeff[14] * data[i-15];
-				case 14: sum += qlp_coeff[13] * data[i-14];
+				case 32: sum += qlp_coeff[31] * data[i-32]; /* Falls through. */
+				case 31: sum += qlp_coeff[30] * data[i-31]; /* Falls through. */
+				case 30: sum += qlp_coeff[29] * data[i-30]; /* Falls through. */
+				case 29: sum += qlp_coeff[28] * data[i-29]; /* Falls through. */
+				case 28: sum += qlp_coeff[27] * data[i-28]; /* Falls through. */
+				case 27: sum += qlp_coeff[26] * data[i-27]; /* Falls through. */
+				case 26: sum += qlp_coeff[25] * data[i-26]; /* Falls through. */
+				case 25: sum += qlp_coeff[24] * data[i-25]; /* Falls through. */
+				case 24: sum += qlp_coeff[23] * data[i-24]; /* Falls through. */
+				case 23: sum += qlp_coeff[22] * data[i-23]; /* Falls through. */
+				case 22: sum += qlp_coeff[21] * data[i-22]; /* Falls through. */
+				case 21: sum += qlp_coeff[20] * data[i-21]; /* Falls through. */
+				case 20: sum += qlp_coeff[19] * data[i-20]; /* Falls through. */
+				case 19: sum += qlp_coeff[18] * data[i-19]; /* Falls through. */
+				case 18: sum += qlp_coeff[17] * data[i-18]; /* Falls through. */
+				case 17: sum += qlp_coeff[16] * data[i-17]; /* Falls through. */
+				case 16: sum += qlp_coeff[15] * data[i-16]; /* Falls through. */
+				case 15: sum += qlp_coeff[14] * data[i-15]; /* Falls through. */
+				case 14: sum += qlp_coeff[13] * data[i-14]; /* Falls through. */
 				case 13: sum += qlp_coeff[12] * data[i-13];
 				         sum += qlp_coeff[11] * data[i-12];
 				         sum += qlp_coeff[10] * data[i-11];
@@ -758,12 +760,12 @@
 static FLAC__int32 pack_arr[8] = { 0, 2, 4, 6, 1, 3, 5, 7 };
 
 FLAC__SSE_TARGET("avx2")
-void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
+void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[])
 {
 	int i;
 	FLAC__int64 sum;
-	__m128i cnt = _mm_cvtsi32_si128(lp_quantization);
-	__m256i pack = _mm256_loadu_si256((const __m256i *)pack_arr);
+	const __m128i cnt = _mm_cvtsi32_si128(lp_quantization);
+	const __m256i pack = _mm256_loadu_si256((const __m256i *)(const void*)pack_arr);
 
 	FLAC__ASSERT(order > 0);
 	FLAC__ASSERT(order <= 32);
@@ -789,20 +791,20 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m256i summ, mull;
-						summ = _mm256_mul_epi32(q11, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-12))));
-						mull = _mm256_mul_epi32(q10, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-11)))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q9,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-10)))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q8,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-9 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q7,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-8 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q6,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-7 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q5,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-6 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q4,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-5 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q3,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-4 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q2,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q1,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q0,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull);
+						summ = _mm256_mul_epi32(q11, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-12))));
+						mull = _mm256_mul_epi32(q10, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-11)))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q9,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-10)))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q8,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-9 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q7,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-8 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q6,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-7 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q5,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-6 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q4,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-5 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q3,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-4 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q2,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q1,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q0,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull);
 						summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ)));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), _mm256_castsi256_si128(summ)));
 					}
 				}
 				else { /* order == 11 */
@@ -821,19 +823,19 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m256i summ, mull;
-						summ = _mm256_mul_epi32(q10, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-11))));
-						mull = _mm256_mul_epi32(q9,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-10)))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q8,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-9 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q7,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-8 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q6,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-7 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q5,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-6 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q4,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-5 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q3,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-4 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q2,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q1,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q0,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull);
+						summ = _mm256_mul_epi32(q10, _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-11))));
+						mull = _mm256_mul_epi32(q9,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-10)))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q8,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-9 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q7,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-8 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q6,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-7 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q5,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-6 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q4,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-5 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q3,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-4 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q2,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q1,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q0,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull);
 						summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ)));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), _mm256_castsi256_si128(summ)));
 					}
 				}
 			}
@@ -853,18 +855,18 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m256i summ, mull;
-						summ = _mm256_mul_epi32(q9,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-10))));
-						mull = _mm256_mul_epi32(q8,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-9 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q7,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-8 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q6,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-7 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q5,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-6 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q4,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-5 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q3,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-4 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q2,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q1,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q0,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull);
+						summ = _mm256_mul_epi32(q9,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-10))));
+						mull = _mm256_mul_epi32(q8,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-9 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q7,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-8 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q6,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-7 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q5,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-6 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q4,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-5 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q3,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-4 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q2,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q1,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q0,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull);
 						summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ)));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), _mm256_castsi256_si128(summ)));
 					}
 				}
 				else { /* order == 9 */
@@ -881,17 +883,17 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m256i summ, mull;
-						summ = _mm256_mul_epi32(q8,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-9 ))));
-						mull = _mm256_mul_epi32(q7,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-8 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q6,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-7 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q5,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-6 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q4,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-5 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q3,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-4 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q2,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q1,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q0,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull);
+						summ = _mm256_mul_epi32(q8,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-9 ))));
+						mull = _mm256_mul_epi32(q7,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-8 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q6,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-7 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q5,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-6 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q4,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-5 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q3,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-4 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q2,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q1,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q0,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull);
 						summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ)));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), _mm256_castsi256_si128(summ)));
 					}
 				}
 			}
@@ -911,16 +913,16 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m256i summ, mull;
-						summ = _mm256_mul_epi32(q7,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-8 ))));
-						mull = _mm256_mul_epi32(q6,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-7 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q5,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-6 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q4,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-5 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q3,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-4 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q2,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q1,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q0,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull);
+						summ = _mm256_mul_epi32(q7,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-8 ))));
+						mull = _mm256_mul_epi32(q6,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-7 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q5,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-6 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q4,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-5 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q3,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-4 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q2,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q1,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q0,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull);
 						summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ)));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), _mm256_castsi256_si128(summ)));
 					}
 				}
 				else { /* order == 7 */
@@ -935,15 +937,15 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m256i summ, mull;
-						summ = _mm256_mul_epi32(q6,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-7 ))));
-						mull = _mm256_mul_epi32(q5,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-6 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q4,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-5 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q3,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-4 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q2,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q1,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q0,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull);
+						summ = _mm256_mul_epi32(q6,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-7 ))));
+						mull = _mm256_mul_epi32(q5,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-6 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q4,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-5 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q3,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-4 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q2,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q1,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q0,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull);
 						summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ)));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), _mm256_castsi256_si128(summ)));
 					}
 				}
 			}
@@ -959,14 +961,14 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m256i summ, mull;
-						summ = _mm256_mul_epi32(q5,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-6 ))));
-						mull = _mm256_mul_epi32(q4,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-5 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q3,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-4 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q2,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q1,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q0,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull);
+						summ = _mm256_mul_epi32(q5,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-6 ))));
+						mull = _mm256_mul_epi32(q4,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-5 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q3,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-4 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q2,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q1,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q0,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull);
 						summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ)));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), _mm256_castsi256_si128(summ)));
 					}
 				}
 				else { /* order == 5 */
@@ -979,13 +981,13 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m256i summ, mull;
-						summ = _mm256_mul_epi32(q4,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-5 ))));
-						mull = _mm256_mul_epi32(q3,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-4 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q2,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q1,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q0,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull);
+						summ = _mm256_mul_epi32(q4,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-5 ))));
+						mull = _mm256_mul_epi32(q3,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-4 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q2,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q1,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q0,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull);
 						summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ)));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), _mm256_castsi256_si128(summ)));
 					}
 				}
 			}
@@ -1001,12 +1003,12 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m256i summ, mull;
-						summ = _mm256_mul_epi32(q3,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-4 ))));
-						mull = _mm256_mul_epi32(q2,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q1,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q0,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull);
+						summ = _mm256_mul_epi32(q3,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-4 ))));
+						mull = _mm256_mul_epi32(q2,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-3 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q1,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q0,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull);
 						summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ)));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), _mm256_castsi256_si128(summ)));
 					}
 				}
 				else { /* order == 3 */
@@ -1017,11 +1019,11 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m256i summ, mull;
-						summ = _mm256_mul_epi32(q2,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-3 ))));
-						mull = _mm256_mul_epi32(q1,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull);
-						mull = _mm256_mul_epi32(q0,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull);
+						summ = _mm256_mul_epi32(q2,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-3 ))));
+						mull = _mm256_mul_epi32(q1,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-2 )))); summ = _mm256_add_epi64(summ, mull);
+						mull = _mm256_mul_epi32(q0,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull);
 						summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ)));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), _mm256_castsi256_si128(summ)));
 					}
 				}
 			}
@@ -1033,10 +1035,10 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m256i summ, mull;
-						summ = _mm256_mul_epi32(q1,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-2 ))));
-						mull = _mm256_mul_epi32(q0,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull);
+						summ = _mm256_mul_epi32(q1,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-2 ))));
+						mull = _mm256_mul_epi32(q0,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-1 )))); summ = _mm256_add_epi64(summ, mull);
 						summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ)));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), _mm256_castsi256_si128(summ)));
 					}
 				}
 				else { /* order == 1 */
@@ -1045,9 +1047,9 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m256i summ;
-						summ = _mm256_mul_epi32(q0,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(data+i-1 ))));
+						summ = _mm256_mul_epi32(q0,  _mm256_cvtepu32_epi64(_mm_loadu_si128((const __m128i*)(const void*)(data+i-1 ))));
 						summ = _mm256_permutevar8x32_epi32(_mm256_srl_epi64(summ, cnt), pack);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), _mm256_castsi256_si128(summ)));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), _mm256_castsi256_si128(summ)));
 					}
 				}
 			}
@@ -1055,17 +1057,17 @@
 		for(; i < (int)data_len; i++) {
 			sum = 0;
 			switch(order) {
-				case 12: sum += qlp_coeff[11] * (FLAC__int64)data[i-12];
-				case 11: sum += qlp_coeff[10] * (FLAC__int64)data[i-11];
-				case 10: sum += qlp_coeff[ 9] * (FLAC__int64)data[i-10];
-				case 9:  sum += qlp_coeff[ 8] * (FLAC__int64)data[i- 9];
-				case 8:  sum += qlp_coeff[ 7] * (FLAC__int64)data[i- 8];
-				case 7:  sum += qlp_coeff[ 6] * (FLAC__int64)data[i- 7];
-				case 6:  sum += qlp_coeff[ 5] * (FLAC__int64)data[i- 6];
-				case 5:  sum += qlp_coeff[ 4] * (FLAC__int64)data[i- 5];
-				case 4:  sum += qlp_coeff[ 3] * (FLAC__int64)data[i- 4];
-				case 3:  sum += qlp_coeff[ 2] * (FLAC__int64)data[i- 3];
-				case 2:  sum += qlp_coeff[ 1] * (FLAC__int64)data[i- 2];
+				case 12: sum += qlp_coeff[11] * (FLAC__int64)data[i-12]; /* Falls through. */
+				case 11: sum += qlp_coeff[10] * (FLAC__int64)data[i-11]; /* Falls through. */
+				case 10: sum += qlp_coeff[ 9] * (FLAC__int64)data[i-10]; /* Falls through. */
+				case 9:  sum += qlp_coeff[ 8] * (FLAC__int64)data[i- 9]; /* Falls through. */
+				case 8:  sum += qlp_coeff[ 7] * (FLAC__int64)data[i- 8]; /* Falls through. */
+				case 7:  sum += qlp_coeff[ 6] * (FLAC__int64)data[i- 7]; /* Falls through. */
+				case 6:  sum += qlp_coeff[ 5] * (FLAC__int64)data[i- 6]; /* Falls through. */
+				case 5:  sum += qlp_coeff[ 4] * (FLAC__int64)data[i- 5]; /* Falls through. */
+				case 4:  sum += qlp_coeff[ 3] * (FLAC__int64)data[i- 4]; /* Falls through. */
+				case 3:  sum += qlp_coeff[ 2] * (FLAC__int64)data[i- 3]; /* Falls through. */
+				case 2:  sum += qlp_coeff[ 1] * (FLAC__int64)data[i- 2]; /* Falls through. */
 				case 1:  sum += qlp_coeff[ 0] * (FLAC__int64)data[i- 1];
 			}
 			residual[i] = data[i] - (FLAC__int32)(sum >> lp_quantization);
@@ -1075,25 +1077,25 @@
 		for(i = 0; i < (int)data_len; i++) {
 			sum = 0;
 			switch(order) {
-				case 32: sum += qlp_coeff[31] * (FLAC__int64)data[i-32];
-				case 31: sum += qlp_coeff[30] * (FLAC__int64)data[i-31];
-				case 30: sum += qlp_coeff[29] * (FLAC__int64)data[i-30];
-				case 29: sum += qlp_coeff[28] * (FLAC__int64)data[i-29];
-				case 28: sum += qlp_coeff[27] * (FLAC__int64)data[i-28];
-				case 27: sum += qlp_coeff[26] * (FLAC__int64)data[i-27];
-				case 26: sum += qlp_coeff[25] * (FLAC__int64)data[i-26];
-				case 25: sum += qlp_coeff[24] * (FLAC__int64)data[i-25];
-				case 24: sum += qlp_coeff[23] * (FLAC__int64)data[i-24];
-				case 23: sum += qlp_coeff[22] * (FLAC__int64)data[i-23];
-				case 22: sum += qlp_coeff[21] * (FLAC__int64)data[i-22];
-				case 21: sum += qlp_coeff[20] * (FLAC__int64)data[i-21];
-				case 20: sum += qlp_coeff[19] * (FLAC__int64)data[i-20];
-				case 19: sum += qlp_coeff[18] * (FLAC__int64)data[i-19];
-				case 18: sum += qlp_coeff[17] * (FLAC__int64)data[i-18];
-				case 17: sum += qlp_coeff[16] * (FLAC__int64)data[i-17];
-				case 16: sum += qlp_coeff[15] * (FLAC__int64)data[i-16];
-				case 15: sum += qlp_coeff[14] * (FLAC__int64)data[i-15];
-				case 14: sum += qlp_coeff[13] * (FLAC__int64)data[i-14];
+				case 32: sum += qlp_coeff[31] * (FLAC__int64)data[i-32]; /* Falls through. */
+				case 31: sum += qlp_coeff[30] * (FLAC__int64)data[i-31]; /* Falls through. */
+				case 30: sum += qlp_coeff[29] * (FLAC__int64)data[i-30]; /* Falls through. */
+				case 29: sum += qlp_coeff[28] * (FLAC__int64)data[i-29]; /* Falls through. */
+				case 28: sum += qlp_coeff[27] * (FLAC__int64)data[i-28]; /* Falls through. */
+				case 27: sum += qlp_coeff[26] * (FLAC__int64)data[i-27]; /* Falls through. */
+				case 26: sum += qlp_coeff[25] * (FLAC__int64)data[i-26]; /* Falls through. */
+				case 25: sum += qlp_coeff[24] * (FLAC__int64)data[i-25]; /* Falls through. */
+				case 24: sum += qlp_coeff[23] * (FLAC__int64)data[i-24]; /* Falls through. */
+				case 23: sum += qlp_coeff[22] * (FLAC__int64)data[i-23]; /* Falls through. */
+				case 22: sum += qlp_coeff[21] * (FLAC__int64)data[i-22]; /* Falls through. */
+				case 21: sum += qlp_coeff[20] * (FLAC__int64)data[i-21]; /* Falls through. */
+				case 20: sum += qlp_coeff[19] * (FLAC__int64)data[i-20]; /* Falls through. */
+				case 19: sum += qlp_coeff[18] * (FLAC__int64)data[i-19]; /* Falls through. */
+				case 18: sum += qlp_coeff[17] * (FLAC__int64)data[i-18]; /* Falls through. */
+				case 17: sum += qlp_coeff[16] * (FLAC__int64)data[i-17]; /* Falls through. */
+				case 16: sum += qlp_coeff[15] * (FLAC__int64)data[i-16]; /* Falls through. */
+				case 15: sum += qlp_coeff[14] * (FLAC__int64)data[i-15]; /* Falls through. */
+				case 14: sum += qlp_coeff[13] * (FLAC__int64)data[i-14]; /* Falls through. */
 				case 13: sum += qlp_coeff[12] * (FLAC__int64)data[i-13];
 				         sum += qlp_coeff[11] * (FLAC__int64)data[i-12];
 				         sum += qlp_coeff[10] * (FLAC__int64)data[i-11];

diff --git a/src/libFLAC/lpc_intrin_fma.c b/src/libFLAC/lpc_intrin_fma.c
new file mode 100644
index 0000000..396ff30
--- /dev/null
+++ b/src/libFLAC/lpc_intrin_fma.c

@@ -0,0 +1,73 @@
+/* libFLAC - Free Lossless Audio Codec library
+ * Copyright (C) 2022 Xiph.Org Foundation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * - Neither the name of the Xiph.org Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include <config.h>
+#endif
+
+#include "private/cpu.h"
+
+#ifndef FLAC__INTEGER_ONLY_LIBRARY
+#ifndef FLAC__NO_ASM
+#if defined FLAC__CPU_X86_64 && FLAC__HAS_X86INTRIN
+#include "private/lpc.h"
+#ifdef FLAC__FMA_SUPPORTED
+
+#include "FLAC/assert.h"
+
+FLAC__SSE_TARGET("fma")
+void FLAC__lpc_compute_autocorrelation_intrin_fma_lag_8(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[])
+{
+#undef MAX_LAG
+#define MAX_LAG 8
+#include "deduplication/lpc_compute_autocorrelation_intrin.c"
+}
+
+FLAC__SSE_TARGET("fma")
+void FLAC__lpc_compute_autocorrelation_intrin_fma_lag_12(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[])
+{
+#undef MAX_LAG
+#define MAX_LAG 12
+#include "deduplication/lpc_compute_autocorrelation_intrin.c"
+}
+FLAC__SSE_TARGET("fma")
+void FLAC__lpc_compute_autocorrelation_intrin_fma_lag_16(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[])
+{
+#undef MAX_LAG
+#define MAX_LAG 16
+#include "deduplication/lpc_compute_autocorrelation_intrin.c"
+
+}
+
+#endif /* FLAC__FMA_SUPPORTED */
+#endif /* FLAC__CPU_X86_64 && FLAC__HAS_X86INTRIN */
+#endif /* FLAC__NO_ASM */
+#endif /* FLAC__INTEGER_ONLY_LIBRARY */

diff --git a/src/libFLAC/lpc_intrin_neon.c b/src/libFLAC/lpc_intrin_neon.c
new file mode 100644
index 0000000..0ba4501
--- /dev/null
+++ b/src/libFLAC/lpc_intrin_neon.c

@@ -0,0 +1,1273 @@
+/* libFLAC - Free Lossless Audio Codec library
+ * Copyright (C) 2000-2009  Josh Coalson
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * - Neither the name of the Xiph.org Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "private/cpu.h"
+
+#ifndef FLAC__INTEGER_ONLY_LIBRARY
+#ifndef FLAC__NO_ASM
+#if defined FLAC__CPU_ARM64 && FLAC__HAS_NEONINTRIN
+#include "private/lpc.h"
+#include "FLAC/assert.h"
+#include "FLAC/format.h"
+#include "private/macros.h"
+#include <arm_neon.h>
+
+#if FLAC__HAS_A64NEONINTRIN
+void FLAC__lpc_compute_autocorrelation_intrin_neon_lag_14(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[])
+{
+#undef MAX_LAG
+#define MAX_LAG 14
+#include "deduplication/lpc_compute_autocorrelation_intrin_neon.c"
+}
+
+void FLAC__lpc_compute_autocorrelation_intrin_neon_lag_10(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[])
+{
+#undef MAX_LAG
+#define MAX_LAG 10
+#include "deduplication/lpc_compute_autocorrelation_intrin_neon.c"
+}
+
+void FLAC__lpc_compute_autocorrelation_intrin_neon_lag_8(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[])
+{
+#undef MAX_LAG
+#define MAX_LAG 8
+#include "deduplication/lpc_compute_autocorrelation_intrin_neon.c"
+}
+
+#endif /* ifdef FLAC__HAS_A64NEONINTRIN */
+
+
+#define MUL_32_BIT_LOOP_UNROOL_3(qlp_coeff_vec, lane) \
+                        summ_0 = vmulq_laneq_s32(tmp_vec[0], qlp_coeff_vec, lane); \
+                        summ_1 = vmulq_laneq_s32(tmp_vec[4], qlp_coeff_vec, lane); \
+                        summ_2 = vmulq_laneq_s32(tmp_vec[8], qlp_coeff_vec, lane); 
+                        
+
+#define MACC_32BIT_LOOP_UNROOL_3(tmp_vec_ind, qlp_coeff_vec, lane) \
+                        summ_0 = vmlaq_laneq_s32(summ_0,tmp_vec[tmp_vec_ind] ,qlp_coeff_vec, lane); \
+                        summ_1 = vmlaq_laneq_s32(summ_1,tmp_vec[tmp_vec_ind+4] ,qlp_coeff_vec, lane); \
+                        summ_2 = vmlaq_laneq_s32(summ_2,tmp_vec[tmp_vec_ind+8] ,qlp_coeff_vec, lane);
+                        
+void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_neon(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[])
+{
+    int i;
+    FLAC__int32 sum;
+    int32x4_t tmp_vec[20];
+
+    FLAC__ASSERT(order > 0);
+    FLAC__ASSERT(order <= 32);
+
+    // Using prologue reads is valid as encoder->private_->local_lpc_compute_residual_from_qlp_coefficients(signal+order,....)
+    if(order <= 12) {
+        if(order > 8) {
+            if(order > 10) {
+                if (order == 12) {
+                    int32x4_t qlp_coeff_0 = {qlp_coeff[0], qlp_coeff[1], qlp_coeff[2], qlp_coeff[3]};
+                    int32x4_t qlp_coeff_1 = {qlp_coeff[4], qlp_coeff[5], qlp_coeff[6], qlp_coeff[7]};
+                    int32x4_t qlp_coeff_2 = {qlp_coeff[8], qlp_coeff[9], qlp_coeff[10], qlp_coeff[11]};
+
+                    tmp_vec[0] = vld1q_s32(data - 12);
+                    tmp_vec[1] = vld1q_s32(data - 11);
+                    tmp_vec[2] = vld1q_s32(data - 10);
+                    tmp_vec[3] = vld1q_s32(data - 9);
+                    tmp_vec[4] = vld1q_s32(data - 8);
+                    tmp_vec[5] = vld1q_s32(data - 7);
+                    tmp_vec[6] = vld1q_s32(data - 6);
+                    tmp_vec[7] = vld1q_s32(data - 5);
+
+                    for (i = 0; i < (int)data_len - 11; i += 12)
+                    {
+                        int32x4_t summ_0, summ_1, summ_2;
+
+                        tmp_vec[8] = vld1q_s32(data + i - 4);
+                        tmp_vec[9] = vld1q_s32(data+i-3);
+                        tmp_vec[10] = vld1q_s32(data+i-2);
+                        tmp_vec[11] = vld1q_s32(data+i-1);
+                        tmp_vec[12] = vld1q_s32(data+i);
+                        tmp_vec[13] = vld1q_s32(data+i+1);
+                        tmp_vec[14] = vld1q_s32(data+i+2);
+                        tmp_vec[15] = vld1q_s32(data+i+3);
+                        tmp_vec[16] = vld1q_s32(data + i + 4);
+                        tmp_vec[17] = vld1q_s32(data + i + 5);
+                        tmp_vec[18] = vld1q_s32(data + i + 6);
+                        tmp_vec[19] = vld1q_s32(data + i + 7);
+
+                        MUL_32_BIT_LOOP_UNROOL_3(qlp_coeff_2, 3)
+                        MACC_32BIT_LOOP_UNROOL_3(1, qlp_coeff_2, 2)
+                        MACC_32BIT_LOOP_UNROOL_3(2, qlp_coeff_2, 1)
+                        MACC_32BIT_LOOP_UNROOL_3(3, qlp_coeff_2, 0)
+                        MACC_32BIT_LOOP_UNROOL_3(4, qlp_coeff_1, 3)
+                        MACC_32BIT_LOOP_UNROOL_3(5, qlp_coeff_1, 2)
+                        MACC_32BIT_LOOP_UNROOL_3(6, qlp_coeff_1, 1)
+                        MACC_32BIT_LOOP_UNROOL_3(7, qlp_coeff_1, 0)
+                        MACC_32BIT_LOOP_UNROOL_3(8, qlp_coeff_0, 3)
+                        MACC_32BIT_LOOP_UNROOL_3(9, qlp_coeff_0, 2)
+                        MACC_32BIT_LOOP_UNROOL_3(10, qlp_coeff_0, 1)
+                        MACC_32BIT_LOOP_UNROOL_3(11, qlp_coeff_0, 0)
+
+                        vst1q_s32(residual+i + 0, vsubq_s32(vld1q_s32(data+i + 0) , vshlq_s32(summ_0,vdupq_n_s32(-lp_quantization))));
+                        vst1q_s32(residual+i + 4, vsubq_s32(vld1q_s32(data+i + 4) , vshlq_s32(summ_1,vdupq_n_s32(-lp_quantization))));
+                        vst1q_s32(residual+i + 8, vsubq_s32(vld1q_s32(data+i + 8) , vshlq_s32(summ_2,vdupq_n_s32(-lp_quantization))));
+
+                        tmp_vec[0] = tmp_vec[12];
+                        tmp_vec[1] = tmp_vec[13];
+                        tmp_vec[2] = tmp_vec[14];
+                        tmp_vec[3] = tmp_vec[15];
+                        tmp_vec[4] = tmp_vec[16];
+                        tmp_vec[5] = tmp_vec[17];
+                        tmp_vec[6] = tmp_vec[18];
+                        tmp_vec[7] = tmp_vec[19];
+                    }
+                }
+
+                else { /* order == 11 */
+                    int32x4_t qlp_coeff_0 = {qlp_coeff[0], qlp_coeff[1], qlp_coeff[2], qlp_coeff[3]};
+                    int32x4_t qlp_coeff_1 = {qlp_coeff[4], qlp_coeff[5], qlp_coeff[6], qlp_coeff[7]};
+                    int32x4_t qlp_coeff_2 = {qlp_coeff[8], qlp_coeff[9], qlp_coeff[10], 0};
+
+                    tmp_vec[0] = vld1q_s32(data - 11);
+                    tmp_vec[1] = vld1q_s32(data - 10);
+                    tmp_vec[2] = vld1q_s32(data - 9);
+                    tmp_vec[3] = vld1q_s32(data - 8);
+                    tmp_vec[4] = vld1q_s32(data - 7);
+                    tmp_vec[5] = vld1q_s32(data - 6);
+                    tmp_vec[6] = vld1q_s32(data - 5);
+                    
+                    for (i = 0; i < (int)data_len - 11; i += 12)
+                    {
+                        int32x4_t summ_0, summ_1, summ_2;
+                        tmp_vec[7] = vld1q_s32(data + i - 4);
+                        tmp_vec[8] = vld1q_s32(data + i - 3);
+                        tmp_vec[9] = vld1q_s32(data + i - 2);
+                        tmp_vec[10] = vld1q_s32(data + i - 1);
+                        tmp_vec[11] = vld1q_s32(data + i - 0);
+                        tmp_vec[12] = vld1q_s32(data + i + 1);
+                        tmp_vec[13] = vld1q_s32(data + i + 2);
+                        tmp_vec[14] = vld1q_s32(data + i + 3);
+                        tmp_vec[15] = vld1q_s32(data + i + 4);
+                        tmp_vec[16] = vld1q_s32(data + i + 5);
+                        tmp_vec[17] = vld1q_s32(data + i + 6);
+                        tmp_vec[18] = vld1q_s32(data + i + 7);
+                        
+                      
+                        MUL_32_BIT_LOOP_UNROOL_3(qlp_coeff_2, 2)
+                        MACC_32BIT_LOOP_UNROOL_3(1, qlp_coeff_2, 1)
+                        MACC_32BIT_LOOP_UNROOL_3(2, qlp_coeff_2, 0)
+                        MACC_32BIT_LOOP_UNROOL_3(3, qlp_coeff_1, 3)
+                        MACC_32BIT_LOOP_UNROOL_3(4, qlp_coeff_1, 2)
+                        MACC_32BIT_LOOP_UNROOL_3(5, qlp_coeff_1, 1)
+                        MACC_32BIT_LOOP_UNROOL_3(6, qlp_coeff_1, 0)
+                        MACC_32BIT_LOOP_UNROOL_3(7, qlp_coeff_0, 3)
+                        MACC_32BIT_LOOP_UNROOL_3(8, qlp_coeff_0, 2)
+                        MACC_32BIT_LOOP_UNROOL_3(9, qlp_coeff_0, 1)
+                        MACC_32BIT_LOOP_UNROOL_3(10, qlp_coeff_0, 0)
+                        
+                        vst1q_s32(residual+i + 0, vsubq_s32(vld1q_s32(data+i + 0) , vshlq_s32(summ_0,vdupq_n_s32(-lp_quantization))));
+                        vst1q_s32(residual+i + 4, vsubq_s32(vld1q_s32(data+i + 4) , vshlq_s32(summ_1,vdupq_n_s32(-lp_quantization))));
+                        vst1q_s32(residual+i + 8, vsubq_s32(vld1q_s32(data+i + 8) , vshlq_s32(summ_2,vdupq_n_s32(-lp_quantization))));
+
+                        
+                        tmp_vec[0] = tmp_vec[12];
+                        tmp_vec[1] = tmp_vec[13];
+                        tmp_vec[2] = tmp_vec[14];
+                        tmp_vec[3] = tmp_vec[15];
+                        tmp_vec[4] = tmp_vec[16];
+                        tmp_vec[5] = tmp_vec[17];
+                        tmp_vec[6] = tmp_vec[18];
+                    }
+                }
+            }
+            else {
+                if(order == 10) {
+                    int32x4_t qlp_coeff_0 = {qlp_coeff[0], qlp_coeff[1], qlp_coeff[2], qlp_coeff[3]};
+                    int32x4_t qlp_coeff_1 = {qlp_coeff[4], qlp_coeff[5], qlp_coeff[6], qlp_coeff[7]};
+                    int32x4_t qlp_coeff_2 = {qlp_coeff[8], qlp_coeff[9], 0, 0};
+
+                    tmp_vec[0] = vld1q_s32(data - 10);
+                    tmp_vec[1] = vld1q_s32(data - 9);
+                    tmp_vec[2] = vld1q_s32(data - 8);
+                    tmp_vec[3] = vld1q_s32(data - 7);
+                    tmp_vec[4] = vld1q_s32(data - 6);
+                    tmp_vec[5] = vld1q_s32(data - 5);
+                    
+                    for (i = 0; i < (int)data_len - 11; i += 12)
+                    {
+                        int32x4_t summ_0, summ_1, summ_2;
+                        tmp_vec[6] = vld1q_s32(data + i - 4);
+                        tmp_vec[7] = vld1q_s32(data + i - 3);
+                        tmp_vec[8] = vld1q_s32(data + i - 2);
+                        tmp_vec[9] = vld1q_s32(data + i - 1);
+                        tmp_vec[10] = vld1q_s32(data + i - 0);
+                        tmp_vec[11] = vld1q_s32(data + i + 1);
+                        tmp_vec[12] = vld1q_s32(data + i + 2);
+                        tmp_vec[13] = vld1q_s32(data + i + 3);
+                        tmp_vec[14] = vld1q_s32(data + i + 4);
+                        tmp_vec[15] = vld1q_s32(data + i + 5);
+                        tmp_vec[16] = vld1q_s32(data + i + 6);
+                        tmp_vec[17] = vld1q_s32(data + i + 7);
+                        
+                            
+                        MUL_32_BIT_LOOP_UNROOL_3(qlp_coeff_2, 1)
+                        MACC_32BIT_LOOP_UNROOL_3(1, qlp_coeff_2, 0)
+                        MACC_32BIT_LOOP_UNROOL_3(2, qlp_coeff_1, 3)
+                        MACC_32BIT_LOOP_UNROOL_3(3, qlp_coeff_1, 2)
+                        MACC_32BIT_LOOP_UNROOL_3(4, qlp_coeff_1, 1)
+                        MACC_32BIT_LOOP_UNROOL_3(5, qlp_coeff_1, 0)
+                        MACC_32BIT_LOOP_UNROOL_3(6, qlp_coeff_0, 3)
+                        MACC_32BIT_LOOP_UNROOL_3(7, qlp_coeff_0, 2)
+                        MACC_32BIT_LOOP_UNROOL_3(8, qlp_coeff_0, 1)
+                        MACC_32BIT_LOOP_UNROOL_3(9, qlp_coeff_0, 0)
+                        
+                        vst1q_s32(residual+i + 0, vsubq_s32(vld1q_s32(data+i + 0) , vshlq_s32(summ_0,vdupq_n_s32(-lp_quantization))));
+                        vst1q_s32(residual+i + 4, vsubq_s32(vld1q_s32(data+i + 4) , vshlq_s32(summ_1,vdupq_n_s32(-lp_quantization))));
+                        vst1q_s32(residual+i + 8, vsubq_s32(vld1q_s32(data+i + 8) , vshlq_s32(summ_2,vdupq_n_s32(-lp_quantization))));
+
+                        
+                        tmp_vec[0] = tmp_vec[12];
+                        tmp_vec[1] = tmp_vec[13];
+                        tmp_vec[2] = tmp_vec[14];
+                        tmp_vec[3] = tmp_vec[15];
+                        tmp_vec[4] = tmp_vec[16];
+                        tmp_vec[5] = tmp_vec[17];
+                    }
+                }
+                else { /* order == 9 */
+                    int32x4_t qlp_coeff_0 = {qlp_coeff[0], qlp_coeff[1], qlp_coeff[2], qlp_coeff[3]};
+                    int32x4_t qlp_coeff_1 = {qlp_coeff[4], qlp_coeff[5], qlp_coeff[6], qlp_coeff[7]};
+                    int32x4_t qlp_coeff_2 = {qlp_coeff[8], 0, 0, 0};
+
+                    tmp_vec[0] = vld1q_s32(data - 9);
+                    tmp_vec[1] = vld1q_s32(data - 8);
+                    tmp_vec[2] = vld1q_s32(data - 7);
+                    tmp_vec[3] = vld1q_s32(data - 6);
+                    tmp_vec[4] = vld1q_s32(data - 5);
+                    
+                    for (i = 0; i < (int)data_len - 11; i += 12)
+                    {
+                        int32x4_t summ_0, summ_1, summ_2;
+                        tmp_vec[5] = vld1q_s32(data + i - 4);
+                        tmp_vec[6] = vld1q_s32(data + i - 3);
+                        tmp_vec[7] = vld1q_s32(data + i - 2);
+                        tmp_vec[8] = vld1q_s32(data + i - 1);
+                        tmp_vec[9] = vld1q_s32(data + i - 0);
+                        tmp_vec[10] = vld1q_s32(data + i + 1);
+                        tmp_vec[11] = vld1q_s32(data + i + 2);
+                        tmp_vec[12] = vld1q_s32(data + i + 3);
+                        tmp_vec[13] = vld1q_s32(data + i + 4);
+                        tmp_vec[14] = vld1q_s32(data + i + 5);
+                        tmp_vec[15] = vld1q_s32(data + i + 6);
+                        tmp_vec[16] = vld1q_s32(data + i + 7);
+                        
+                        MUL_32_BIT_LOOP_UNROOL_3(qlp_coeff_2, 0)
+                        MACC_32BIT_LOOP_UNROOL_3(1, qlp_coeff_1, 3)
+                        MACC_32BIT_LOOP_UNROOL_3(2, qlp_coeff_1, 2)
+                        MACC_32BIT_LOOP_UNROOL_3(3, qlp_coeff_1, 1)
+                        MACC_32BIT_LOOP_UNROOL_3(4, qlp_coeff_1, 0)
+                        MACC_32BIT_LOOP_UNROOL_3(5, qlp_coeff_0, 3)
+                        MACC_32BIT_LOOP_UNROOL_3(6, qlp_coeff_0, 2)
+                        MACC_32BIT_LOOP_UNROOL_3(7, qlp_coeff_0, 1)
+                        MACC_32BIT_LOOP_UNROOL_3(8, qlp_coeff_0, 0)
+
+                        vst1q_s32(residual+i + 0, vsubq_s32(vld1q_s32(data+i + 0) , vshlq_s32(summ_0,vdupq_n_s32(-lp_quantization))));
+                        vst1q_s32(residual+i + 4, vsubq_s32(vld1q_s32(data+i + 4) , vshlq_s32(summ_1,vdupq_n_s32(-lp_quantization))));
+                        vst1q_s32(residual+i + 8, vsubq_s32(vld1q_s32(data+i + 8) , vshlq_s32(summ_2,vdupq_n_s32(-lp_quantization))));
+
+                        tmp_vec[0] = tmp_vec[12];
+                        tmp_vec[1] = tmp_vec[13];
+                        tmp_vec[2] = tmp_vec[14];
+                        tmp_vec[3] = tmp_vec[15];
+                        tmp_vec[4] = tmp_vec[16];
+                    }
+                }
+            }
+        }
+        else if(order > 4) {
+            if(order > 6) {
+                if(order == 8) {
+                    int32x4_t qlp_coeff_0 = {qlp_coeff[0], qlp_coeff[1], qlp_coeff[2], qlp_coeff[3]};
+                    int32x4_t qlp_coeff_1 = {qlp_coeff[4], qlp_coeff[5], qlp_coeff[6], qlp_coeff[7]};
+
+                    tmp_vec[0] = vld1q_s32(data - 8);
+                    tmp_vec[1] = vld1q_s32(data - 7);
+                    tmp_vec[2] = vld1q_s32(data - 6);
+                    tmp_vec[3] = vld1q_s32(data - 5);
+                    
+                    for (i = 0; i < (int)data_len - 11; i += 12)
+                    {
+                        int32x4_t summ_0, summ_1, summ_2;
+                        tmp_vec[4] = vld1q_s32(data + i - 4);
+                        tmp_vec[5] = vld1q_s32(data + i - 3);
+                        tmp_vec[6] = vld1q_s32(data + i - 2);
+                        tmp_vec[7] = vld1q_s32(data + i - 1);
+                        tmp_vec[8] = vld1q_s32(data + i - 0);
+                        tmp_vec[9] = vld1q_s32(data + i + 1);
+                        tmp_vec[10] = vld1q_s32(data + i + 2);
+                        tmp_vec[11] = vld1q_s32(data + i + 3);
+                        tmp_vec[12] = vld1q_s32(data + i + 4);
+                        tmp_vec[13] = vld1q_s32(data + i + 5);
+                        tmp_vec[14] = vld1q_s32(data + i + 6);
+                        tmp_vec[15] = vld1q_s32(data + i + 7);
+                        
+                        MUL_32_BIT_LOOP_UNROOL_3(qlp_coeff_1, 3)
+                        MACC_32BIT_LOOP_UNROOL_3(1, qlp_coeff_1, 2)
+                        MACC_32BIT_LOOP_UNROOL_3(2, qlp_coeff_1, 1)
+                        MACC_32BIT_LOOP_UNROOL_3(3, qlp_coeff_1, 0)
+                        MACC_32BIT_LOOP_UNROOL_3(4, qlp_coeff_0, 3)
+                        MACC_32BIT_LOOP_UNROOL_3(5, qlp_coeff_0, 2)
+                        MACC_32BIT_LOOP_UNROOL_3(6, qlp_coeff_0, 1)
+                        MACC_32BIT_LOOP_UNROOL_3(7, qlp_coeff_0, 0)
+
+                        vst1q_s32(residual+i + 0, vsubq_s32(vld1q_s32(data+i + 0) , vshlq_s32(summ_0,vdupq_n_s32(-lp_quantization))));
+                        vst1q_s32(residual+i + 4, vsubq_s32(vld1q_s32(data+i + 4) , vshlq_s32(summ_1,vdupq_n_s32(-lp_quantization))));
+                        vst1q_s32(residual+i + 8, vsubq_s32(vld1q_s32(data+i + 8) , vshlq_s32(summ_2,vdupq_n_s32(-lp_quantization))));
+
+                        tmp_vec[0] = tmp_vec[12];
+                        tmp_vec[1] = tmp_vec[13];
+                        tmp_vec[2] = tmp_vec[14];
+                        tmp_vec[3] = tmp_vec[15];
+                    }
+                }
+                else { /* order == 7 */
+                    int32x4_t qlp_coeff_0 = {qlp_coeff[0], qlp_coeff[1], qlp_coeff[2], qlp_coeff[3]};
+                    int32x4_t qlp_coeff_1 = {qlp_coeff[4], qlp_coeff[5], qlp_coeff[6], 0};
+
+                    tmp_vec[0] = vld1q_s32(data - 7);
+                    tmp_vec[1] = vld1q_s32(data - 6);
+                    tmp_vec[2] = vld1q_s32(data - 5);
+                    
+                    for (i = 0; i < (int)data_len - 11; i += 12)
+                    {
+                        int32x4_t summ_0, summ_1, summ_2;
+                        tmp_vec[3] = vld1q_s32(data + i - 4);
+                        tmp_vec[4] = vld1q_s32(data + i - 3);
+                        tmp_vec[5] = vld1q_s32(data + i - 2);
+                        tmp_vec[6] = vld1q_s32(data + i - 1);
+                        tmp_vec[7] = vld1q_s32(data + i - 0);
+                        tmp_vec[8] = vld1q_s32(data + i + 1);
+                        tmp_vec[9] = vld1q_s32(data + i + 2);
+                        tmp_vec[10] = vld1q_s32(data + i + 3);
+                        tmp_vec[11] = vld1q_s32(data + i + 4);
+                        tmp_vec[12] = vld1q_s32(data + i + 5);
+                        tmp_vec[13] = vld1q_s32(data + i + 6);
+                        tmp_vec[14] = vld1q_s32(data + i + 7);
+                        
+                        MUL_32_BIT_LOOP_UNROOL_3(qlp_coeff_1, 2)
+                        MACC_32BIT_LOOP_UNROOL_3(1, qlp_coeff_1, 1)
+                        MACC_32BIT_LOOP_UNROOL_3(2, qlp_coeff_1, 0)
+                        MACC_32BIT_LOOP_UNROOL_3(3, qlp_coeff_0, 3)
+                        MACC_32BIT_LOOP_UNROOL_3(4, qlp_coeff_0, 2)
+                        MACC_32BIT_LOOP_UNROOL_3(5, qlp_coeff_0, 1)
+                        MACC_32BIT_LOOP_UNROOL_3(6, qlp_coeff_0, 0)
+                        
+                        vst1q_s32(residual+i + 0, vsubq_s32(vld1q_s32(data+i + 0) , vshlq_s32(summ_0,vdupq_n_s32(-lp_quantization))));
+                        vst1q_s32(residual+i + 4, vsubq_s32(vld1q_s32(data+i + 4) , vshlq_s32(summ_1,vdupq_n_s32(-lp_quantization))));
+                        vst1q_s32(residual+i + 8, vsubq_s32(vld1q_s32(data+i + 8) , vshlq_s32(summ_2,vdupq_n_s32(-lp_quantization))));
+
+                        tmp_vec[0] = tmp_vec[12];
+                        tmp_vec[1] = tmp_vec[13];
+                        tmp_vec[2] = tmp_vec[14];
+                    }
+                }
+            }
+            else {
+                if(order == 6) {
+                    int32x4_t qlp_coeff_0 = {qlp_coeff[0], qlp_coeff[1], qlp_coeff[2], qlp_coeff[3]};
+                    int32x4_t qlp_coeff_1 = {qlp_coeff[4], qlp_coeff[5], 0, 0};
+
+                    tmp_vec[0] = vld1q_s32(data - 6);
+                    tmp_vec[1] = vld1q_s32(data - 5);
+                    
+                    for (i = 0; i < (int)data_len - 11; i += 12)
+                    {
+                        int32x4_t summ_0, summ_1, summ_2;
+                        tmp_vec[2] = vld1q_s32(data + i - 4);
+                        tmp_vec[3] = vld1q_s32(data + i - 3);
+                        tmp_vec[4] = vld1q_s32(data + i - 2);
+                        tmp_vec[5] = vld1q_s32(data + i - 1);
+                        tmp_vec[6] = vld1q_s32(data + i - 0);
+                        tmp_vec[7] = vld1q_s32(data + i + 1);
+                        tmp_vec[8] = vld1q_s32(data + i + 2);
+                        tmp_vec[9] = vld1q_s32(data + i + 3);
+                        tmp_vec[10] = vld1q_s32(data + i + 4);
+                        tmp_vec[11] = vld1q_s32(data + i + 5);
+                        tmp_vec[12] = vld1q_s32(data + i + 6);
+                        tmp_vec[13] = vld1q_s32(data + i + 7);
+                        
+                        MUL_32_BIT_LOOP_UNROOL_3(qlp_coeff_1, 1)
+                        MACC_32BIT_LOOP_UNROOL_3(1, qlp_coeff_1, 0)
+                        MACC_32BIT_LOOP_UNROOL_3(2, qlp_coeff_0, 3)
+                        MACC_32BIT_LOOP_UNROOL_3(3, qlp_coeff_0, 2)
+                        MACC_32BIT_LOOP_UNROOL_3(4, qlp_coeff_0, 1)
+                        MACC_32BIT_LOOP_UNROOL_3(5, qlp_coeff_0, 0)
+                        
+                        vst1q_s32(residual+i + 0, vsubq_s32(vld1q_s32(data+i + 0) , vshlq_s32(summ_0,vdupq_n_s32(-lp_quantization))));
+                        vst1q_s32(residual+i + 4, vsubq_s32(vld1q_s32(data+i + 4) , vshlq_s32(summ_1,vdupq_n_s32(-lp_quantization))));
+                        vst1q_s32(residual+i + 8, vsubq_s32(vld1q_s32(data+i + 8) , vshlq_s32(summ_2,vdupq_n_s32(-lp_quantization))));
+
+                        tmp_vec[0] = tmp_vec[12];
+                        tmp_vec[1] = tmp_vec[13];
+                    }
+                }
+                else { /* order == 5 */
+                    int32x4_t qlp_coeff_0 = {qlp_coeff[0], qlp_coeff[1], qlp_coeff[2], qlp_coeff[3]};
+                    int32x4_t qlp_coeff_1 = {qlp_coeff[4], 0, 0, 0};
+
+                    tmp_vec[0] = vld1q_s32(data - 5);
+                    
+                    for (i = 0; i < (int)data_len - 11; i += 12)
+                    {
+                        int32x4_t summ_0, summ_1, summ_2;
+
+                        tmp_vec[1] = vld1q_s32(data + i - 4);
+                        tmp_vec[2] = vld1q_s32(data + i - 3);
+                        tmp_vec[3] = vld1q_s32(data + i - 2);
+                        tmp_vec[4] = vld1q_s32(data + i - 1);
+                        tmp_vec[5] = vld1q_s32(data + i - 0);
+                        tmp_vec[6] = vld1q_s32(data + i + 1);
+                        tmp_vec[7] = vld1q_s32(data + i + 2);
+                        tmp_vec[8] = vld1q_s32(data + i + 3);
+                        tmp_vec[9] = vld1q_s32(data + i + 4);
+                        tmp_vec[10] = vld1q_s32(data + i + 5);
+                        tmp_vec[11] = vld1q_s32(data + i + 6);
+                        tmp_vec[12] = vld1q_s32(data + i + 7);
+                        
+                        MUL_32_BIT_LOOP_UNROOL_3(qlp_coeff_1, 0)
+                        MACC_32BIT_LOOP_UNROOL_3(1, qlp_coeff_0, 3)
+                        MACC_32BIT_LOOP_UNROOL_3(2, qlp_coeff_0, 2)
+                        MACC_32BIT_LOOP_UNROOL_3(3, qlp_coeff_0, 1)
+                        MACC_32BIT_LOOP_UNROOL_3(4, qlp_coeff_0, 0)
+                        
+                        vst1q_s32(residual+i + 0, vsubq_s32(vld1q_s32(data+i + 0) , vshlq_s32(summ_0,vdupq_n_s32(-lp_quantization))));
+                        vst1q_s32(residual+i + 4, vsubq_s32(vld1q_s32(data+i + 4) , vshlq_s32(summ_1,vdupq_n_s32(-lp_quantization))));
+                        vst1q_s32(residual+i + 8, vsubq_s32(vld1q_s32(data+i + 8) , vshlq_s32(summ_2,vdupq_n_s32(-lp_quantization))));
+
+                        tmp_vec[0] = tmp_vec[12];
+                    }
+                }
+            }
+        }
+        else {
+            if(order > 2) {
+                if(order == 4) {
+                    int32x4_t qlp_coeff_0 = {qlp_coeff[0], qlp_coeff[1], qlp_coeff[2], qlp_coeff[3]};
+                    
+                    for (i = 0; i < (int)data_len - 11; i += 12)
+                    {
+                        int32x4_t summ_0, summ_1, summ_2;
+                        tmp_vec[0] = vld1q_s32(data + i - 4);
+                        tmp_vec[1] = vld1q_s32(data + i - 3);
+                        tmp_vec[2] = vld1q_s32(data + i - 2);
+                        tmp_vec[3] = vld1q_s32(data + i - 1);
+                        tmp_vec[4] = vld1q_s32(data + i - 0);
+                        tmp_vec[5] = vld1q_s32(data + i + 1);
+                        tmp_vec[6] = vld1q_s32(data + i + 2);
+                        tmp_vec[7] = vld1q_s32(data + i + 3);
+                        tmp_vec[8] = vld1q_s32(data + i + 4);
+                        tmp_vec[9] = vld1q_s32(data + i + 5);
+                        tmp_vec[10] = vld1q_s32(data + i + 6);
+                        tmp_vec[11] = vld1q_s32(data + i + 7);
+                    
+                        MUL_32_BIT_LOOP_UNROOL_3(qlp_coeff_0, 3)
+                        MACC_32BIT_LOOP_UNROOL_3(1, qlp_coeff_0, 2)
+                        MACC_32BIT_LOOP_UNROOL_3(2, qlp_coeff_0, 1)
+                        MACC_32BIT_LOOP_UNROOL_3(3, qlp_coeff_0, 0)
+                        
+                        vst1q_s32(residual+i + 0, vsubq_s32(vld1q_s32(data+i + 0) , vshlq_s32(summ_0,vdupq_n_s32(-lp_quantization))));
+                        vst1q_s32(residual+i + 4, vsubq_s32(vld1q_s32(data+i + 4) , vshlq_s32(summ_1,vdupq_n_s32(-lp_quantization))));
+                        vst1q_s32(residual+i + 8, vsubq_s32(vld1q_s32(data+i + 8) , vshlq_s32(summ_2,vdupq_n_s32(-lp_quantization))));
+                    }
+                }
+                else { /* order == 3 */
+                    int32x4_t qlp_coeff_0 = {qlp_coeff[0], qlp_coeff[1], qlp_coeff[2], 0};
+
+                    for (i = 0; i < (int)data_len - 11; i += 12)
+                    {
+                        int32x4_t summ_0, summ_1, summ_2;
+                        tmp_vec[0] = vld1q_s32(data + i - 3);
+                        tmp_vec[1] = vld1q_s32(data + i - 2);
+                        tmp_vec[2] = vld1q_s32(data + i - 1);
+                        tmp_vec[4] = vld1q_s32(data + i + 1);
+                        tmp_vec[5] = vld1q_s32(data + i + 2);
+                        tmp_vec[6] = vld1q_s32(data + i + 3);
+                        tmp_vec[8] = vld1q_s32(data + i + 5);
+                        tmp_vec[9] = vld1q_s32(data + i + 6);
+                        tmp_vec[10] = vld1q_s32(data + i + 7);
+                        
+                        MUL_32_BIT_LOOP_UNROOL_3(qlp_coeff_0, 2)
+                        MACC_32BIT_LOOP_UNROOL_3(1, qlp_coeff_0, 1)
+                        MACC_32BIT_LOOP_UNROOL_3(2, qlp_coeff_0, 0)
+
+                        vst1q_s32(residual+i + 0, vsubq_s32(vld1q_s32(data+i + 0) , vshlq_s32(summ_0,vdupq_n_s32(-lp_quantization))));
+                        vst1q_s32(residual+i + 4, vsubq_s32(vld1q_s32(data+i + 4) , vshlq_s32(summ_1,vdupq_n_s32(-lp_quantization))));
+                        vst1q_s32(residual+i + 8, vsubq_s32(vld1q_s32(data+i + 8) , vshlq_s32(summ_2,vdupq_n_s32(-lp_quantization))));
+                    }
+                }
+            }
+            else {
+                if(order == 2) {
+                    int32x4_t qlp_coeff_0 = {qlp_coeff[0], qlp_coeff[1], 0, 0};
+
+                    for (i = 0; i < (int)data_len - 11; i += 12)
+                    {
+                        int32x4_t summ_0, summ_1, summ_2;
+                        tmp_vec[0] = vld1q_s32(data + i - 2);
+                        tmp_vec[1] = vld1q_s32(data + i - 1);
+                        tmp_vec[4] = vld1q_s32(data + i + 2);
+                        tmp_vec[5] = vld1q_s32(data + i + 3);
+                        tmp_vec[8] = vld1q_s32(data + i + 6);
+                        tmp_vec[9] = vld1q_s32(data + i + 7);
+                        
+                        MUL_32_BIT_LOOP_UNROOL_3(qlp_coeff_0, 1)
+                        MACC_32BIT_LOOP_UNROOL_3(1, qlp_coeff_0, 0)
+                        
+                        vst1q_s32(residual+i + 0, vsubq_s32(vld1q_s32(data+i + 0) , vshlq_s32(summ_0,vdupq_n_s32(-lp_quantization))));
+                        vst1q_s32(residual+i + 4, vsubq_s32(vld1q_s32(data+i + 4) , vshlq_s32(summ_1,vdupq_n_s32(-lp_quantization))));
+                        vst1q_s32(residual+i + 8, vsubq_s32(vld1q_s32(data+i + 8) , vshlq_s32(summ_2,vdupq_n_s32(-lp_quantization))));
+                    }
+                }
+                else { /* order == 1 */
+                    int32x4_t qlp_coeff_0 = vdupq_n_s32(qlp_coeff[0]);
+
+                    for (i = 0; i < (int)data_len - 11; i += 12)
+                    {
+                        int32x4_t summ_0, summ_1, summ_2;
+                        tmp_vec[0] = vld1q_s32(data + i - 1);
+                        tmp_vec[4] = vld1q_s32(data + i + 3);
+                        tmp_vec[8] = vld1q_s32(data + i + 7);
+                        
+                        summ_0 = vmulq_s32(tmp_vec[0], qlp_coeff_0);
+                        summ_1 = vmulq_s32(tmp_vec[4], qlp_coeff_0);
+                        summ_2 = vmulq_s32(tmp_vec[8], qlp_coeff_0);
+
+                        vst1q_s32(residual+i + 0, vsubq_s32(vld1q_s32(data+i + 0) , vshlq_s32(summ_0,vdupq_n_s32(-lp_quantization))));
+                        vst1q_s32(residual+i + 4, vsubq_s32(vld1q_s32(data+i + 4) , vshlq_s32(summ_1,vdupq_n_s32(-lp_quantization))));
+                        vst1q_s32(residual+i + 8, vsubq_s32(vld1q_s32(data+i + 8) , vshlq_s32(summ_2,vdupq_n_s32(-lp_quantization))));
+                    }
+                }
+            }
+        }
+        for(; i < (int)data_len; i++) {
+            sum = 0;
+            switch(order) {
+                case 12: sum += qlp_coeff[11] * data[i-12]; /* Falls through. */
+                case 11: sum += qlp_coeff[10] * data[i-11]; /* Falls through. */
+                case 10: sum += qlp_coeff[ 9] * data[i-10]; /* Falls through. */
+                case 9:  sum += qlp_coeff[ 8] * data[i- 9]; /* Falls through. */
+                case 8:  sum += qlp_coeff[ 7] * data[i- 8]; /* Falls through. */
+                case 7:  sum += qlp_coeff[ 6] * data[i- 7]; /* Falls through. */
+                case 6:  sum += qlp_coeff[ 5] * data[i- 6]; /* Falls through. */
+                case 5:  sum += qlp_coeff[ 4] * data[i- 5]; /* Falls through. */
+                case 4:  sum += qlp_coeff[ 3] * data[i- 4]; /* Falls through. */
+                case 3:  sum += qlp_coeff[ 2] * data[i- 3]; /* Falls through. */
+                case 2:  sum += qlp_coeff[ 1] * data[i- 2]; /* Falls through. */
+                case 1:  sum += qlp_coeff[ 0] * data[i- 1];
+            }
+            residual[i] = data[i] - (sum >> lp_quantization);
+        }
+    }
+    else { /* order > 12 */
+        for(i = 0; i < (int)data_len; i++) {
+            sum = 0;
+            switch(order) {
+                case 32: sum += qlp_coeff[31] * data[i-32]; /* Falls through. */
+                case 31: sum += qlp_coeff[30] * data[i-31]; /* Falls through. */
+                case 30: sum += qlp_coeff[29] * data[i-30]; /* Falls through. */
+                case 29: sum += qlp_coeff[28] * data[i-29]; /* Falls through. */
+                case 28: sum += qlp_coeff[27] * data[i-28]; /* Falls through. */
+                case 27: sum += qlp_coeff[26] * data[i-27]; /* Falls through. */
+                case 26: sum += qlp_coeff[25] * data[i-26]; /* Falls through. */
+                case 25: sum += qlp_coeff[24] * data[i-25]; /* Falls through. */
+                case 24: sum += qlp_coeff[23] * data[i-24]; /* Falls through. */
+                case 23: sum += qlp_coeff[22] * data[i-23]; /* Falls through. */
+                case 22: sum += qlp_coeff[21] * data[i-22]; /* Falls through. */
+                case 21: sum += qlp_coeff[20] * data[i-21]; /* Falls through. */
+                case 20: sum += qlp_coeff[19] * data[i-20]; /* Falls through. */
+                case 19: sum += qlp_coeff[18] * data[i-19]; /* Falls through. */
+                case 18: sum += qlp_coeff[17] * data[i-18]; /* Falls through. */
+                case 17: sum += qlp_coeff[16] * data[i-17]; /* Falls through. */
+                case 16: sum += qlp_coeff[15] * data[i-16]; /* Falls through. */
+                case 15: sum += qlp_coeff[14] * data[i-15]; /* Falls through. */
+                case 14: sum += qlp_coeff[13] * data[i-14]; /* Falls through. */
+                case 13: sum += qlp_coeff[12] * data[i-13];
+                         sum += qlp_coeff[11] * data[i-12];
+                         sum += qlp_coeff[10] * data[i-11];
+                         sum += qlp_coeff[ 9] * data[i-10];
+                         sum += qlp_coeff[ 8] * data[i- 9];
+                         sum += qlp_coeff[ 7] * data[i- 8];
+                         sum += qlp_coeff[ 6] * data[i- 7];
+                         sum += qlp_coeff[ 5] * data[i- 6];
+                         sum += qlp_coeff[ 4] * data[i- 5];
+                         sum += qlp_coeff[ 3] * data[i- 4];
+                         sum += qlp_coeff[ 2] * data[i- 3];
+                         sum += qlp_coeff[ 1] * data[i- 2];
+                         sum += qlp_coeff[ 0] * data[i- 1];
+            }
+            residual[i] = data[i] - (sum >> lp_quantization);
+        }
+    }
+}
+
+
+
+#define MUL_64_BIT_LOOP_UNROOL_3(qlp_coeff_vec, lane) \
+                        summ_l_0 = vmull_laneq_s32(vget_low_s32(tmp_vec[0]),qlp_coeff_vec, lane); \
+                        summ_h_0 = vmull_high_laneq_s32(tmp_vec[0], qlp_coeff_vec, lane);\
+                        summ_l_1 = vmull_laneq_s32(vget_low_s32(tmp_vec[4]),qlp_coeff_vec, lane); \
+                        summ_h_1 = vmull_high_laneq_s32(tmp_vec[4], qlp_coeff_vec, lane);\
+                        summ_l_2 = vmull_laneq_s32(vget_low_s32(tmp_vec[8]),qlp_coeff_vec, lane);\
+                        summ_h_2 = vmull_high_laneq_s32(tmp_vec[8], qlp_coeff_vec, lane);
+
+
+#define MACC_64_BIT_LOOP_UNROOL_3(tmp_vec_ind, qlp_coeff_vec, lane) \
+                        summ_l_0 = vmlal_laneq_s32(summ_l_0,vget_low_s32(tmp_vec[tmp_vec_ind]),qlp_coeff_vec, lane); \
+                        summ_h_0 = vmlal_high_laneq_s32(summ_h_0, tmp_vec[tmp_vec_ind], qlp_coeff_vec, lane); \
+                        summ_l_1 = vmlal_laneq_s32(summ_l_1, vget_low_s32(tmp_vec[tmp_vec_ind+4]),qlp_coeff_vec, lane); \
+                        summ_h_1 = vmlal_high_laneq_s32(summ_h_1, tmp_vec[tmp_vec_ind+4], qlp_coeff_vec, lane); \
+                        summ_l_2 = vmlal_laneq_s32(summ_l_2, vget_low_s32(tmp_vec[tmp_vec_ind+8]),qlp_coeff_vec, lane);\
+                        summ_h_2 = vmlal_high_laneq_s32(summ_h_2,tmp_vec[tmp_vec_ind+8], qlp_coeff_vec, lane);
+
+#define SHIFT_SUMS_64BITS_AND_STORE_SUB() \
+                        res0 = vuzp1q_s32(vreinterpretq_s32_s64(vshlq_s64(summ_l_0,lp_quantization_vec)), vreinterpretq_s32_s64(vshlq_s64(summ_h_0,lp_quantization_vec))); \
+                        res1 = vuzp1q_s32(vreinterpretq_s32_s64(vshlq_s64(summ_l_1,lp_quantization_vec)), vreinterpretq_s32_s64(vshlq_s64(summ_h_1,lp_quantization_vec))); \
+                        res2 = vuzp1q_s32(vreinterpretq_s32_s64(vshlq_s64(summ_l_2,lp_quantization_vec)), vreinterpretq_s32_s64(vshlq_s64(summ_h_2,lp_quantization_vec))); \
+                        vst1q_s32(residual+i+0, vsubq_s32(vld1q_s32(data+i+0), res0));\
+                        vst1q_s32(residual+i+4, vsubq_s32(vld1q_s32(data+i+4), res1));\
+                        vst1q_s32(residual+i+8, vsubq_s32(vld1q_s32(data+i+8), res2));
+
+void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_neon(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[]) {
+	int i;
+	FLAC__int64 sum;
+	
+    int32x4_t tmp_vec[20];
+    int32x4_t res0, res1, res2;
+    int64x2_t  lp_quantization_vec = vdupq_n_s64(-lp_quantization);
+
+    FLAC__ASSERT(order > 0);
+	FLAC__ASSERT(order <= 32);
+    
+    // Using prologue reads is valid as encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_64bit(signal+order,....)
+	if(order <= 12) {
+		if(order > 8) {
+			if(order > 10) {
+				if(order == 12) {
+                    int32x4_t qlp_coeff_0 = {qlp_coeff[0], qlp_coeff[1], qlp_coeff[2], qlp_coeff[3]};
+                    int32x4_t qlp_coeff_1 = {qlp_coeff[4],qlp_coeff[5],qlp_coeff[6],qlp_coeff[7]};
+                    int32x4_t qlp_coeff_2 = {qlp_coeff[8],qlp_coeff[9],qlp_coeff[10],qlp_coeff[11]};
+
+                    tmp_vec[0] = vld1q_s32(data - 12);
+                    tmp_vec[1] = vld1q_s32(data - 11);
+                    tmp_vec[2] = vld1q_s32(data - 10);
+                    tmp_vec[3] = vld1q_s32(data - 9);
+                    tmp_vec[4] = vld1q_s32(data - 8);
+                    tmp_vec[5] = vld1q_s32(data - 7);
+                    tmp_vec[6] = vld1q_s32(data - 6);
+                    tmp_vec[7] = vld1q_s32(data - 5);
+
+                    for (i = 0; i < (int)data_len - 11; i += 12)
+                    {
+                        int64x2_t  summ_l_0, summ_h_0, summ_l_1, summ_h_1, summ_l_2, summ_h_2;
+                        
+                        tmp_vec[8] = vld1q_s32(data+i-4);
+                        tmp_vec[9] = vld1q_s32(data+i-3);
+                        tmp_vec[10] = vld1q_s32(data+i-2);
+                        tmp_vec[11] = vld1q_s32(data+i-1);
+                        tmp_vec[12] = vld1q_s32(data+i);
+                        tmp_vec[13] = vld1q_s32(data+i+1);
+                        tmp_vec[14] = vld1q_s32(data+i+2);
+                        tmp_vec[15] = vld1q_s32(data+i+3);
+                        tmp_vec[16] = vld1q_s32(data + i + 4);
+                        tmp_vec[17] = vld1q_s32(data + i + 5);
+                        tmp_vec[18] = vld1q_s32(data + i + 6);
+                        tmp_vec[19] = vld1q_s32(data + i + 7);
+
+                        MUL_64_BIT_LOOP_UNROOL_3(qlp_coeff_2, 3)
+                        MACC_64_BIT_LOOP_UNROOL_3(1, qlp_coeff_2, 2) 
+                        MACC_64_BIT_LOOP_UNROOL_3(2, qlp_coeff_2, 1) 
+                        MACC_64_BIT_LOOP_UNROOL_3(3, qlp_coeff_2, 0) 
+                        MACC_64_BIT_LOOP_UNROOL_3(4, qlp_coeff_1, 3) 
+                        MACC_64_BIT_LOOP_UNROOL_3(5, qlp_coeff_1, 2) 
+                        MACC_64_BIT_LOOP_UNROOL_3(6, qlp_coeff_1, 1) 
+                        MACC_64_BIT_LOOP_UNROOL_3(7, qlp_coeff_1, 0) 
+                        MACC_64_BIT_LOOP_UNROOL_3(8, qlp_coeff_0, 3) 
+                        MACC_64_BIT_LOOP_UNROOL_3(9, qlp_coeff_0, 2) 
+                        MACC_64_BIT_LOOP_UNROOL_3(10,qlp_coeff_0, 1) 
+                        MACC_64_BIT_LOOP_UNROOL_3(11,qlp_coeff_0, 0) 
+
+                        SHIFT_SUMS_64BITS_AND_STORE_SUB()
+                        
+                        tmp_vec[0] = tmp_vec[12];
+                        tmp_vec[1] = tmp_vec[13];
+                        tmp_vec[2] = tmp_vec[14];
+                        tmp_vec[3] = tmp_vec[15];
+                        tmp_vec[4] = tmp_vec[16];
+                        tmp_vec[5] = tmp_vec[17];
+                        tmp_vec[6] = tmp_vec[18];
+                        tmp_vec[7] = tmp_vec[19];
+                    }
+                }
+				else { /* order == 11 */			
+                    int32x4_t qlp_coeff_0 = {qlp_coeff[0], qlp_coeff[1], qlp_coeff[2], qlp_coeff[3]};
+                    int32x4_t qlp_coeff_1 = {qlp_coeff[4],qlp_coeff[5],qlp_coeff[6],qlp_coeff[7]};
+                    int32x4_t qlp_coeff_2 = {qlp_coeff[8],qlp_coeff[9],qlp_coeff[10],0};
+
+                    tmp_vec[0] = vld1q_s32(data - 11);
+                    tmp_vec[1] = vld1q_s32(data - 10);
+                    tmp_vec[2] = vld1q_s32(data - 9);
+                    tmp_vec[3] = vld1q_s32(data - 8);
+                    tmp_vec[4] = vld1q_s32(data - 7);
+                    tmp_vec[5] = vld1q_s32(data - 6);
+                    tmp_vec[6] = vld1q_s32(data - 5);
+
+                    for (i = 0; i < (int)data_len - 11; i += 12)
+                    {
+                        int64x2_t  summ_l_0, summ_h_0, summ_l_1, summ_h_1, summ_l_2, summ_h_2;
+                        
+                        tmp_vec[7] = vld1q_s32(data+i-4);
+                        tmp_vec[8] = vld1q_s32(data+i-3);
+                        tmp_vec[9] = vld1q_s32(data+i-2);
+                        tmp_vec[10] = vld1q_s32(data+i-1);
+                        tmp_vec[11] = vld1q_s32(data+i);
+                        tmp_vec[12] = vld1q_s32(data+i+1);
+                        tmp_vec[13] = vld1q_s32(data+i+2);
+                        tmp_vec[14] = vld1q_s32(data+i+3);
+                        tmp_vec[15] = vld1q_s32(data + i + 4);
+                        tmp_vec[16] = vld1q_s32(data + i + 5);
+                        tmp_vec[17] = vld1q_s32(data + i + 6);
+                        tmp_vec[18] = vld1q_s32(data + i + 7);
+
+                        MUL_64_BIT_LOOP_UNROOL_3(qlp_coeff_2, 2)
+                        MACC_64_BIT_LOOP_UNROOL_3(1, qlp_coeff_2, 1) 
+                        MACC_64_BIT_LOOP_UNROOL_3(2, qlp_coeff_2, 0) 
+                        MACC_64_BIT_LOOP_UNROOL_3(3, qlp_coeff_1, 3) 
+                        MACC_64_BIT_LOOP_UNROOL_3(4, qlp_coeff_1, 2) 
+                        MACC_64_BIT_LOOP_UNROOL_3(5, qlp_coeff_1, 1) 
+                        MACC_64_BIT_LOOP_UNROOL_3(6, qlp_coeff_1, 0) 
+                        MACC_64_BIT_LOOP_UNROOL_3(7, qlp_coeff_0, 3) 
+                        MACC_64_BIT_LOOP_UNROOL_3(8, qlp_coeff_0, 2) 
+                        MACC_64_BIT_LOOP_UNROOL_3(9, qlp_coeff_0, 1) 
+                        MACC_64_BIT_LOOP_UNROOL_3(10,qlp_coeff_0, 0) 
+
+                        SHIFT_SUMS_64BITS_AND_STORE_SUB()
+                        
+                        tmp_vec[0] = tmp_vec[12];
+                        tmp_vec[1] = tmp_vec[13];
+                        tmp_vec[2] = tmp_vec[14];
+                        tmp_vec[3] = tmp_vec[15];
+                        tmp_vec[4] = tmp_vec[16];
+                        tmp_vec[5] = tmp_vec[17];
+                        tmp_vec[6] = tmp_vec[18];
+                    }
+                }
+            }
+            else
+            {
+                if (order == 10) {
+                    int32x4_t qlp_coeff_0 = {qlp_coeff[0], qlp_coeff[1], qlp_coeff[2], qlp_coeff[3]};
+                    int32x4_t qlp_coeff_1 = {qlp_coeff[4], qlp_coeff[5], qlp_coeff[6], qlp_coeff[7]};
+                    int32x4_t qlp_coeff_2 = {qlp_coeff[8], qlp_coeff[9], 0, 0};
+
+                    tmp_vec[0] = vld1q_s32(data - 10);
+                    tmp_vec[1] = vld1q_s32(data - 9);
+                    tmp_vec[2] = vld1q_s32(data - 8);
+                    tmp_vec[3] = vld1q_s32(data - 7);
+                    tmp_vec[4] = vld1q_s32(data - 6);
+                    tmp_vec[5] = vld1q_s32(data - 5);
+                    
+
+                    for (i = 0; i < (int)data_len - 11; i += 12)
+                    {
+                        int64x2_t summ_l_0, summ_h_0, summ_l_1, summ_h_1, summ_l_2, summ_h_2;
+                        
+                        tmp_vec[6] = vld1q_s32(data + i - 4);
+                        tmp_vec[7] = vld1q_s32(data + i - 3);
+                        tmp_vec[8] = vld1q_s32(data + i - 2);
+                        tmp_vec[9] = vld1q_s32(data + i - 1);
+                        tmp_vec[10] = vld1q_s32(data + i - 0);
+                        tmp_vec[11] = vld1q_s32(data + i + 1);
+                        tmp_vec[12] = vld1q_s32(data + i + 2);
+                        tmp_vec[13] = vld1q_s32(data + i + 3);
+                        tmp_vec[14] = vld1q_s32(data + i + 4);
+                        tmp_vec[15] = vld1q_s32(data + i + 5);
+                        tmp_vec[16] = vld1q_s32(data + i + 6);
+                        tmp_vec[17] = vld1q_s32(data + i + 7);
+                        
+                        MUL_64_BIT_LOOP_UNROOL_3(qlp_coeff_2, 1)
+                        MACC_64_BIT_LOOP_UNROOL_3(1, qlp_coeff_2, 0) 
+                        MACC_64_BIT_LOOP_UNROOL_3(2, qlp_coeff_1, 3) 
+                        MACC_64_BIT_LOOP_UNROOL_3(3, qlp_coeff_1, 2) 
+                        MACC_64_BIT_LOOP_UNROOL_3(4, qlp_coeff_1, 1) 
+                        MACC_64_BIT_LOOP_UNROOL_3(5, qlp_coeff_1, 0) 
+                        MACC_64_BIT_LOOP_UNROOL_3(6, qlp_coeff_0, 3) 
+                        MACC_64_BIT_LOOP_UNROOL_3(7, qlp_coeff_0, 2) 
+                        MACC_64_BIT_LOOP_UNROOL_3(8, qlp_coeff_0, 1) 
+                        MACC_64_BIT_LOOP_UNROOL_3(9, qlp_coeff_0, 0) 
+
+                        SHIFT_SUMS_64BITS_AND_STORE_SUB()
+                        
+                        tmp_vec[0] = tmp_vec[12];
+                        tmp_vec[1] = tmp_vec[13];
+                        tmp_vec[2] = tmp_vec[14];
+                        tmp_vec[3] = tmp_vec[15];
+                        tmp_vec[4] = tmp_vec[16];
+                        tmp_vec[5] = tmp_vec[17];
+                    }
+                }
+
+                else /* order == 9 */ {
+                    int32x4_t qlp_coeff_0 = {qlp_coeff[0], qlp_coeff[1], qlp_coeff[2], qlp_coeff[3]};
+                    int32x4_t qlp_coeff_1 = {qlp_coeff[4], qlp_coeff[5], qlp_coeff[6], qlp_coeff[7]};
+                    int32x4_t qlp_coeff_2 = {qlp_coeff[8], 0, 0, 0};
+
+                    tmp_vec[0] = vld1q_s32(data - 9);
+                    tmp_vec[1] = vld1q_s32(data - 8);
+                    tmp_vec[2] = vld1q_s32(data - 7);
+                    tmp_vec[3] = vld1q_s32(data - 6);
+                    tmp_vec[4] = vld1q_s32(data - 5);
+
+                    for (i = 0; i < (int)data_len - 11; i += 12)
+                    {
+                        int64x2_t summ_l_0, summ_h_0, summ_l_1, summ_h_1, summ_l_2, summ_h_2;
+
+                        tmp_vec[5] = vld1q_s32(data + i - 4);
+                        tmp_vec[6] = vld1q_s32(data + i - 3);
+                        tmp_vec[7] = vld1q_s32(data + i - 2);
+                        tmp_vec[8] = vld1q_s32(data + i - 1);
+                        tmp_vec[9] = vld1q_s32(data + i - 0);
+                        tmp_vec[10] = vld1q_s32(data + i + 1);
+                        tmp_vec[11] = vld1q_s32(data + i + 2);
+                        tmp_vec[12] = vld1q_s32(data + i + 3);
+                        tmp_vec[13] = vld1q_s32(data + i + 4);
+                        tmp_vec[14] = vld1q_s32(data + i + 5);
+                        tmp_vec[15] = vld1q_s32(data + i + 6);
+                        tmp_vec[16] = vld1q_s32(data + i + 7);
+
+                        MUL_64_BIT_LOOP_UNROOL_3(qlp_coeff_2, 0)
+                        MACC_64_BIT_LOOP_UNROOL_3(1, qlp_coeff_1, 3) 
+                        MACC_64_BIT_LOOP_UNROOL_3(2, qlp_coeff_1, 2) 
+                        MACC_64_BIT_LOOP_UNROOL_3(3, qlp_coeff_1, 1) 
+                        MACC_64_BIT_LOOP_UNROOL_3(4, qlp_coeff_1, 0) 
+                        MACC_64_BIT_LOOP_UNROOL_3(5, qlp_coeff_0, 3) 
+                        MACC_64_BIT_LOOP_UNROOL_3(6, qlp_coeff_0, 2) 
+                        MACC_64_BIT_LOOP_UNROOL_3(7, qlp_coeff_0, 1) 
+                        MACC_64_BIT_LOOP_UNROOL_3(8, qlp_coeff_0, 0) 
+
+                        SHIFT_SUMS_64BITS_AND_STORE_SUB()
+                        
+                        tmp_vec[0] = tmp_vec[12];
+                        tmp_vec[1] = tmp_vec[13];
+                        tmp_vec[2] = tmp_vec[14];
+                        tmp_vec[3] = tmp_vec[15];
+                        tmp_vec[4] = tmp_vec[16];
+                    }
+                }
+            }
+        }
+        else if (order > 4)
+        {
+            if (order > 6)
+            {
+                if (order == 8)
+                {
+                    int32x4_t qlp_coeff_0 = {qlp_coeff[0], qlp_coeff[1], qlp_coeff[2], qlp_coeff[3]};
+                    int32x4_t qlp_coeff_1 = {qlp_coeff[4], qlp_coeff[5], qlp_coeff[6], qlp_coeff[7]};
+                 
+                    tmp_vec[0] = vld1q_s32(data - 8);
+                    tmp_vec[1] = vld1q_s32(data - 7);
+                    tmp_vec[2] = vld1q_s32(data - 6);
+                    tmp_vec[3] = vld1q_s32(data - 5);
+
+                    for (i = 0; i < (int)data_len - 11; i += 12)
+                    {
+                        int64x2_t summ_l_0, summ_h_0, summ_l_1, summ_h_1, summ_l_2, summ_h_2;
+
+                        tmp_vec[4] = vld1q_s32(data + i - 4);
+                        tmp_vec[5] = vld1q_s32(data + i - 3);
+                        tmp_vec[6] = vld1q_s32(data + i - 2);
+                        tmp_vec[7] = vld1q_s32(data + i - 1);
+                        tmp_vec[8] = vld1q_s32(data + i - 0);
+                        tmp_vec[9] = vld1q_s32(data + i + 1);
+                        tmp_vec[10] = vld1q_s32(data + i + 2);
+                        tmp_vec[11] = vld1q_s32(data + i + 3);
+                        tmp_vec[12] = vld1q_s32(data + i + 4);
+                        tmp_vec[13] = vld1q_s32(data + i + 5);
+                        tmp_vec[14] = vld1q_s32(data + i + 6);
+                        tmp_vec[15] = vld1q_s32(data + i + 7);
+                        
+                      
+                        MUL_64_BIT_LOOP_UNROOL_3(qlp_coeff_1, 3)
+                        MACC_64_BIT_LOOP_UNROOL_3(1, qlp_coeff_1, 2) 
+                        MACC_64_BIT_LOOP_UNROOL_3(2, qlp_coeff_1, 1) 
+                        MACC_64_BIT_LOOP_UNROOL_3(3, qlp_coeff_1, 0) 
+                        MACC_64_BIT_LOOP_UNROOL_3(4, qlp_coeff_0, 3) 
+                        MACC_64_BIT_LOOP_UNROOL_3(5, qlp_coeff_0, 2) 
+                        MACC_64_BIT_LOOP_UNROOL_3(6, qlp_coeff_0, 1) 
+                        MACC_64_BIT_LOOP_UNROOL_3(7, qlp_coeff_0, 0) 
+
+                        SHIFT_SUMS_64BITS_AND_STORE_SUB()
+                        
+                        tmp_vec[0] = tmp_vec[12];
+                        tmp_vec[1] = tmp_vec[13];
+                        tmp_vec[2] = tmp_vec[14];
+                        tmp_vec[3] = tmp_vec[15];
+                    }
+                }
+                else /* order == 7 */
+                {
+                    int32x4_t qlp_coeff_0 = {qlp_coeff[0], qlp_coeff[1], qlp_coeff[2], qlp_coeff[3]};
+                    int32x4_t qlp_coeff_1 = {qlp_coeff[4], qlp_coeff[5], qlp_coeff[6], 0};
+
+                    tmp_vec[0] = vld1q_s32(data - 7);
+                    tmp_vec[1] = vld1q_s32(data - 6);
+                    tmp_vec[2] = vld1q_s32(data - 5);
+                    
+
+                    for (i = 0; i < (int)data_len - 11; i += 12)
+                    {
+                        int64x2_t summ_l_0, summ_h_0, summ_l_1, summ_h_1, summ_l_2, summ_h_2;
+                        tmp_vec[3] = vld1q_s32(data +i - 4);
+                        tmp_vec[4] = vld1q_s32(data + i - 3);
+                        tmp_vec[5] = vld1q_s32(data + i - 2);
+                        tmp_vec[6] = vld1q_s32(data + i - 1);
+                        tmp_vec[7] = vld1q_s32(data + i - 0);
+                        tmp_vec[8] = vld1q_s32(data + i + 1);
+                        tmp_vec[9] = vld1q_s32(data + i + 2);
+                        tmp_vec[10] = vld1q_s32(data + i + 3);
+                        tmp_vec[11] = vld1q_s32(data + i + 4);
+                        tmp_vec[12] = vld1q_s32(data + i + 5);
+                        tmp_vec[13] = vld1q_s32(data + i + 6);
+                        tmp_vec[14] = vld1q_s32(data + i + 7);
+                                              
+                      
+                        MUL_64_BIT_LOOP_UNROOL_3(qlp_coeff_1, 2)
+                        MACC_64_BIT_LOOP_UNROOL_3(1, qlp_coeff_1, 1) 
+                        MACC_64_BIT_LOOP_UNROOL_3(2, qlp_coeff_1, 0) 
+                        MACC_64_BIT_LOOP_UNROOL_3(3, qlp_coeff_0, 3) 
+                        MACC_64_BIT_LOOP_UNROOL_3(4, qlp_coeff_0, 2) 
+                        MACC_64_BIT_LOOP_UNROOL_3(5, qlp_coeff_0, 1) 
+                        MACC_64_BIT_LOOP_UNROOL_3(6, qlp_coeff_0, 0) 
+
+                        SHIFT_SUMS_64BITS_AND_STORE_SUB()
+                        
+                        tmp_vec[0] = tmp_vec[12];
+                        tmp_vec[1] = tmp_vec[13];
+                        tmp_vec[2] = tmp_vec[14];
+                    }
+                }
+            }
+            else
+            {
+                if (order == 6) {
+                    int32x4_t qlp_coeff_0 = {qlp_coeff[0], qlp_coeff[1], qlp_coeff[2], qlp_coeff[3]};
+                    int32x4_t qlp_coeff_1 = {qlp_coeff[4], qlp_coeff[5], 0, 0};
+
+                    tmp_vec[0] = vld1q_s32(data - 6);
+                    tmp_vec[1] = vld1q_s32(data - 5);
+                    
+                    for (i = 0; i < (int)data_len - 11; i += 12)
+                    {
+                        int64x2_t summ_l_0, summ_h_0, summ_l_1, summ_h_1, summ_l_2, summ_h_2;
+
+                        tmp_vec[2] = vld1q_s32(data + i - 4);
+                        tmp_vec[3] = vld1q_s32(data + i - 3);
+                        tmp_vec[4] = vld1q_s32(data + i - 2);
+                        tmp_vec[5] = vld1q_s32(data + i - 1);
+                        tmp_vec[6] = vld1q_s32(data + i - 0);
+                        tmp_vec[7] = vld1q_s32(data + i + 1);
+                        tmp_vec[8] = vld1q_s32(data + i + 2);
+                        tmp_vec[9] = vld1q_s32(data + i + 3);
+                        tmp_vec[10] = vld1q_s32(data + i + 4);
+                        tmp_vec[11] = vld1q_s32(data + i + 5);
+                        tmp_vec[12] = vld1q_s32(data + i + 6);
+                        tmp_vec[13] = vld1q_s32(data + i + 7);
+                        
+                       
+                        MUL_64_BIT_LOOP_UNROOL_3(qlp_coeff_1, 1)
+                        MACC_64_BIT_LOOP_UNROOL_3(1, qlp_coeff_1, 0) 
+                        MACC_64_BIT_LOOP_UNROOL_3(2, qlp_coeff_0, 3) 
+                        MACC_64_BIT_LOOP_UNROOL_3(3, qlp_coeff_0, 2) 
+                        MACC_64_BIT_LOOP_UNROOL_3(4, qlp_coeff_0, 1) 
+                        MACC_64_BIT_LOOP_UNROOL_3(5, qlp_coeff_0, 0) 
+                        
+                        SHIFT_SUMS_64BITS_AND_STORE_SUB()
+                        
+                        tmp_vec[0] = tmp_vec[12];
+                        tmp_vec[1] = tmp_vec[13];
+                    }
+                }
+
+                else
+                { /* order == 5 */
+                    int32x4_t qlp_coeff_0 = {qlp_coeff[0], qlp_coeff[1], qlp_coeff[2], qlp_coeff[3]};
+                    int32x4_t qlp_coeff_1 = {qlp_coeff[4], 0, 0, 0};
+
+                    tmp_vec[0] = vld1q_s32(data - 5);
+                    
+                    for (i = 0; i < (int)data_len - 11; i += 12)
+                    {
+                        int64x2_t summ_l_0, summ_h_0, summ_l_1, summ_h_1, summ_l_2, summ_h_2;
+                        tmp_vec[1] = vld1q_s32(data + i - 4);
+                        tmp_vec[2] = vld1q_s32(data + i - 3);
+                        tmp_vec[3] = vld1q_s32(data + i - 2);
+                        tmp_vec[4] = vld1q_s32(data + i - 1);
+                        tmp_vec[5] = vld1q_s32(data + i - 0);
+                        tmp_vec[6] = vld1q_s32(data + i + 1);
+                        tmp_vec[7] = vld1q_s32(data + i + 2);
+                        tmp_vec[8] = vld1q_s32(data + i + 3);
+                        tmp_vec[9] = vld1q_s32(data + i + 4);
+                        tmp_vec[10] = vld1q_s32(data + i + 5);
+                        tmp_vec[11] = vld1q_s32(data + i + 6);
+                        tmp_vec[12] = vld1q_s32(data + i + 7);
+                        
+                        MUL_64_BIT_LOOP_UNROOL_3(qlp_coeff_1, 0)
+                        MACC_64_BIT_LOOP_UNROOL_3(1, qlp_coeff_0, 3) 
+                        MACC_64_BIT_LOOP_UNROOL_3(2, qlp_coeff_0, 2) 
+                        MACC_64_BIT_LOOP_UNROOL_3(3, qlp_coeff_0, 1) 
+                        MACC_64_BIT_LOOP_UNROOL_3(4, qlp_coeff_0, 0) 
+                        
+                        SHIFT_SUMS_64BITS_AND_STORE_SUB()
+                        
+                        tmp_vec[0] = tmp_vec[12];
+                    }
+                }
+            }
+        }
+        else
+        {
+            if (order > 2)
+            {
+                if (order == 4)
+                {
+                    int32x4_t qlp_coeff_0 = {qlp_coeff[0], qlp_coeff[1], qlp_coeff[2], qlp_coeff[3]};
+                    
+                    for (i = 0; i < (int)data_len - 11; i += 12)
+                    {
+                        int64x2_t summ_l_0, summ_h_0, summ_l_1, summ_h_1, summ_l_2, summ_h_2;
+                        tmp_vec[0] = vld1q_s32(data + i - 4);
+                        tmp_vec[1] = vld1q_s32(data + i - 3);
+                        tmp_vec[2] = vld1q_s32(data + i - 2);
+                        tmp_vec[3] = vld1q_s32(data + i - 1);
+                        tmp_vec[4] = vld1q_s32(data + i - 0);
+                        tmp_vec[5] = vld1q_s32(data + i + 1);
+                        tmp_vec[6] = vld1q_s32(data + i + 2);
+                        tmp_vec[7] = vld1q_s32(data + i + 3);
+                        tmp_vec[8] = vld1q_s32(data + i + 4);
+                        tmp_vec[9] = vld1q_s32(data + i + 5);
+                        tmp_vec[10] = vld1q_s32(data + i + 6);
+                        tmp_vec[11] = vld1q_s32(data + i + 7);
+                        
+                        MUL_64_BIT_LOOP_UNROOL_3(qlp_coeff_0, 3)
+                        MACC_64_BIT_LOOP_UNROOL_3(1, qlp_coeff_0, 2) 
+                        MACC_64_BIT_LOOP_UNROOL_3(2, qlp_coeff_0, 1) 
+                        MACC_64_BIT_LOOP_UNROOL_3(3, qlp_coeff_0, 0) 
+                                               
+                        SHIFT_SUMS_64BITS_AND_STORE_SUB()                        
+                    }
+                }
+                else
+                { /* order == 3 */
+
+                    int32x4_t qlp_coeff_0 = {qlp_coeff[0], qlp_coeff[1], qlp_coeff[2], 0};
+                    
+                    for (i = 0; i < (int)data_len - 11; i += 12)
+                    {
+                        int64x2_t summ_l_0, summ_h_0, summ_l_1, summ_h_1, summ_l_2, summ_h_2;
+                        tmp_vec[0] = vld1q_s32(data + i - 3);
+                        tmp_vec[1] = vld1q_s32(data + i - 2);
+                        tmp_vec[2] = vld1q_s32(data + i - 1);
+                        tmp_vec[4] = vld1q_s32(data + i + 1);
+                        tmp_vec[5] = vld1q_s32(data + i + 2);
+                        tmp_vec[6] = vld1q_s32(data + i + 3);
+                        tmp_vec[8] = vld1q_s32(data + i + 5);
+                        tmp_vec[9] = vld1q_s32(data + i + 6);
+                        tmp_vec[10] = vld1q_s32(data + i + 7);
+                        
+                        MUL_64_BIT_LOOP_UNROOL_3(qlp_coeff_0, 2)
+                        MACC_64_BIT_LOOP_UNROOL_3(1, qlp_coeff_0, 1) 
+                        MACC_64_BIT_LOOP_UNROOL_3(2, qlp_coeff_0, 0) 
+                        
+                        SHIFT_SUMS_64BITS_AND_STORE_SUB()                        
+                    }
+                }
+            }
+            else
+            {
+                if (order == 2)
+                {
+                    int32x4_t qlp_coeff_0 = {qlp_coeff[0], qlp_coeff[1], 0, 0};
+
+                    for (i = 0; i < (int)data_len - 11; i += 12)
+                    {
+                        int64x2_t summ_l_0, summ_h_0, summ_l_1, summ_h_1, summ_l_2, summ_h_2;
+                        tmp_vec[0] = vld1q_s32(data + i - 2);
+                        tmp_vec[1] = vld1q_s32(data + i - 1);
+                        tmp_vec[4] = vld1q_s32(data + i + 2);
+                        tmp_vec[5] = vld1q_s32(data + i + 3);
+                        tmp_vec[8] = vld1q_s32(data + i + 6);
+                        tmp_vec[9] = vld1q_s32(data + i + 7);
+                        
+                        MUL_64_BIT_LOOP_UNROOL_3(qlp_coeff_0, 1)
+                        MACC_64_BIT_LOOP_UNROOL_3(1, qlp_coeff_0, 0) 
+
+                        SHIFT_SUMS_64BITS_AND_STORE_SUB()                        
+                    }
+                }
+
+                else
+                { /* order == 1 */
+
+                    int32x2_t qlp_coeff_0_2 = vdup_n_s32(qlp_coeff[0]);
+                    int32x4_t qlp_coeff_0_4 = vdupq_n_s32(qlp_coeff[0]);
+
+                    for (i = 0; i < (int)data_len - 11; i += 12)
+                    {
+                        int64x2_t summ_l_0, summ_h_0, summ_l_1, summ_h_1, summ_l_2, summ_h_2;
+                        tmp_vec[0] = vld1q_s32(data + i - 1);
+                        tmp_vec[4] = vld1q_s32(data + i + 3);
+                        tmp_vec[8] = vld1q_s32(data + i + 7);
+                        
+                        summ_l_0 = vmull_s32(vget_low_s32(tmp_vec[0]), qlp_coeff_0_2);
+                        summ_h_0 = vmull_high_s32(tmp_vec[0], qlp_coeff_0_4);
+
+                        summ_l_1 = vmull_s32(vget_low_s32(tmp_vec[4]), qlp_coeff_0_2);
+                        summ_h_1 = vmull_high_s32(tmp_vec[4], qlp_coeff_0_4);
+
+                        summ_l_2 = vmull_s32(vget_low_s32(tmp_vec[8]), qlp_coeff_0_2);
+                        summ_h_2 = vmull_high_s32(tmp_vec[8], qlp_coeff_0_4);
+
+                        SHIFT_SUMS_64BITS_AND_STORE_SUB()                        
+                    }
+                }
+            }
+        }
+        for (; i < (int)data_len; i++)
+        {
+            sum = 0;
+            switch (order)
+            {
+            case 12:
+                sum += qlp_coeff[11] * (FLAC__int64)data[i - 12]; /* Falls through. */
+            case 11:
+                sum += qlp_coeff[10] * (FLAC__int64)data[i - 11]; /* Falls through. */
+            case 10:
+                sum += qlp_coeff[9] * (FLAC__int64)data[i - 10]; /* Falls through. */
+            case 9:
+                sum += qlp_coeff[8] * (FLAC__int64)data[i - 9]; /* Falls through. */
+            case 8:
+                sum += qlp_coeff[7] * (FLAC__int64)data[i - 8]; /* Falls through. */
+            case 7:
+                sum += qlp_coeff[6] * (FLAC__int64)data[i - 7]; /* Falls through. */
+            case 6:
+                sum += qlp_coeff[5] * (FLAC__int64)data[i - 6]; /* Falls through. */
+            case 5:
+                sum += qlp_coeff[4] * (FLAC__int64)data[i - 5]; /* Falls through. */
+            case 4:
+                sum += qlp_coeff[3] * (FLAC__int64)data[i - 4]; /* Falls through. */
+            case 3:
+                sum += qlp_coeff[2] * (FLAC__int64)data[i - 3]; /* Falls through. */
+            case 2:
+                sum += qlp_coeff[1] * (FLAC__int64)data[i - 2]; /* Falls through. */
+            case 1:
+                sum += qlp_coeff[0] * (FLAC__int64)data[i - 1];
+            }
+            residual[i] = data[i] - (sum >> lp_quantization);
+        }
+    }
+    else
+    { /* order > 12 */
+        for (i = 0; i < (int)data_len; i++)
+        {
+            sum = 0;
+            switch (order)
+            {
+            case 32:
+                sum += qlp_coeff[31] * (FLAC__int64)data[i - 32]; /* Falls through. */
+            case 31:
+                sum += qlp_coeff[30] * (FLAC__int64)data[i - 31]; /* Falls through. */
+            case 30:
+                sum += qlp_coeff[29] * (FLAC__int64)data[i - 30]; /* Falls through. */
+            case 29:
+                sum += qlp_coeff[28] * (FLAC__int64)data[i - 29]; /* Falls through. */
+            case 28:
+                sum += qlp_coeff[27] * (FLAC__int64)data[i - 28]; /* Falls through. */
+            case 27:
+                sum += qlp_coeff[26] * (FLAC__int64)data[i - 27]; /* Falls through. */
+            case 26:
+                sum += qlp_coeff[25] * (FLAC__int64)data[i - 26]; /* Falls through. */
+            case 25:
+                sum += qlp_coeff[24] * (FLAC__int64)data[i - 25]; /* Falls through. */
+            case 24:
+                sum += qlp_coeff[23] * (FLAC__int64)data[i - 24]; /* Falls through. */
+            case 23:
+                sum += qlp_coeff[22] * (FLAC__int64)data[i - 23]; /* Falls through. */
+            case 22:
+                sum += qlp_coeff[21] * (FLAC__int64)data[i - 22]; /* Falls through. */
+            case 21:
+                sum += qlp_coeff[20] * (FLAC__int64)data[i - 21]; /* Falls through. */
+            case 20:
+                sum += qlp_coeff[19] * (FLAC__int64)data[i - 20]; /* Falls through. */
+            case 19:
+                sum += qlp_coeff[18] * (FLAC__int64)data[i - 19]; /* Falls through. */
+            case 18:
+                sum += qlp_coeff[17] * (FLAC__int64)data[i - 18]; /* Falls through. */
+            case 17:
+                sum += qlp_coeff[16] * (FLAC__int64)data[i - 17]; /* Falls through. */
+            case 16:
+                sum += qlp_coeff[15] * (FLAC__int64)data[i - 16]; /* Falls through. */
+            case 15:
+                sum += qlp_coeff[14] * (FLAC__int64)data[i - 15]; /* Falls through. */
+            case 14:
+                sum += qlp_coeff[13] * (FLAC__int64)data[i - 14]; /* Falls through. */
+            case 13:
+                sum += qlp_coeff[12] * (FLAC__int64)data[i - 13];
+                sum += qlp_coeff[11] * (FLAC__int64)data[i - 12];
+                sum += qlp_coeff[10] * (FLAC__int64)data[i - 11];
+                sum += qlp_coeff[9] * (FLAC__int64)data[i - 10];	
+                sum += qlp_coeff[8] * (FLAC__int64)data[i - 9];
+                sum += qlp_coeff[7] * (FLAC__int64)data[i - 8];
+                sum += qlp_coeff[6] * (FLAC__int64)data[i - 7];
+                sum += qlp_coeff[5] * (FLAC__int64)data[i - 6];
+                sum += qlp_coeff[4] * (FLAC__int64)data[i - 5];
+                sum += qlp_coeff[3] * (FLAC__int64)data[i - 4];
+                sum += qlp_coeff[2] * (FLAC__int64)data[i - 3];
+                sum += qlp_coeff[1] * (FLAC__int64)data[i - 2];
+                sum += qlp_coeff[0] * (FLAC__int64)data[i - 1];
+            }
+            residual[i] = data[i] - (sum >> lp_quantization);
+        }
+    }
+
+    return;
+}
+
+#endif /* FLAC__CPU_ARM64 && FLAC__HAS_ARCH64INTRIN */
+#endif /* FLAC__NO_ASM */
+#endif /* FLAC__INTEGER_ONLY_LIBRARY */

diff --git a/src/libFLAC/lpc_intrin_sse.c b/src/libFLAC/lpc_intrin_sse.c
deleted file mode 100644
index 81bf586..0000000
--- a/src/libFLAC/lpc_intrin_sse.c
+++ /dev/null

@@ -1,451 +0,0 @@
-/* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2000-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * - Neither the name of the Xiph.org Foundation nor the names of its
- * contributors may be used to endorse or promote products derived from
- * this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#ifdef HAVE_CONFIG_H
-#  include <config.h>
-#endif
-
-#ifndef FLAC__INTEGER_ONLY_LIBRARY
-#ifndef FLAC__NO_ASM
-#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
-#include "private/lpc.h"
-#ifdef FLAC__SSE_SUPPORTED
-
-#include "FLAC/assert.h"
-#include "FLAC/format.h"
-
-#include <xmmintrin.h> /* SSE */
-
-#if 1
-/* Faster on current Intel (starting from Core i aka Nehalem) and all AMD CPUs */
-
-FLAC__SSE_TARGET("sse")
-void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
-{
-	int i;
-	int limit = data_len - 4;
-	__m128 sum0;
-
-	(void) lag;
-	FLAC__ASSERT(lag <= 4);
-	FLAC__ASSERT(lag <= data_len);
-
-	sum0 = _mm_setzero_ps();
-
-	for(i = 0; i <= limit; i++) {
-		__m128 d, d0;
-		d0 = _mm_loadu_ps(data+i);
-		d = d0; d = _mm_shuffle_ps(d, d, 0);
-		sum0 = _mm_add_ps(sum0, _mm_mul_ps(d0, d));
-	}
-
-	{
-		__m128 d0 = _mm_setzero_ps();
-		limit++; if(limit < 0) limit = 0;
-
-		for(i = data_len-1; i >= limit; i--) {
-			__m128 d;
-			d = _mm_load_ss(data+i); d = _mm_shuffle_ps(d, d, 0);
-			d0 = _mm_shuffle_ps(d0, d0, _MM_SHUFFLE(2,1,0,3));
-			d0 = _mm_move_ss(d0, d);
-			sum0 = _mm_add_ps(sum0, _mm_mul_ps(d, d0));
-		}
-	}
-
-	_mm_storeu_ps(autoc,   sum0);
-}
-
-FLAC__SSE_TARGET("sse")
-void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
-{
-	int i;
-	int limit = data_len - 8;
-	__m128 sum0, sum1;
-
-	(void) lag;
-	FLAC__ASSERT(lag <= 8);
-	FLAC__ASSERT(lag <= data_len);
-
-	sum0 = _mm_setzero_ps();
-	sum1 = _mm_setzero_ps();
-
-	for(i = 0; i <= limit; i++) {
-		__m128 d, d0, d1;
-		d0 = _mm_loadu_ps(data+i);
-		d1 = _mm_loadu_ps(data+i+4);
-		d = d0; d = _mm_shuffle_ps(d, d, 0);
-		sum0 = _mm_add_ps(sum0, _mm_mul_ps(d0, d));
-		sum1 = _mm_add_ps(sum1, _mm_mul_ps(d1, d));
-	}
-
-	{
-		__m128 d0 = _mm_setzero_ps();
-		__m128 d1 = _mm_setzero_ps();
-		limit++; if(limit < 0) limit = 0;
-
-		for(i = data_len-1; i >= limit; i--) {
-			__m128 d;
-			d = _mm_load_ss(data+i); d = _mm_shuffle_ps(d, d, 0);
-			d1 = _mm_shuffle_ps(d1, d1, _MM_SHUFFLE(2,1,0,3));
-			d0 = _mm_shuffle_ps(d0, d0, _MM_SHUFFLE(2,1,0,3));
-			d1 = _mm_move_ss(d1, d0);
-			d0 = _mm_move_ss(d0, d);
-			sum1 = _mm_add_ps(sum1, _mm_mul_ps(d, d1));
-			sum0 = _mm_add_ps(sum0, _mm_mul_ps(d, d0));
-		}
-	}
-
-	_mm_storeu_ps(autoc,   sum0);
-	_mm_storeu_ps(autoc+4, sum1);
-}
-
-FLAC__SSE_TARGET("sse")
-void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
-{
-	int i;
-	int limit = data_len - 12;
-	__m128 sum0, sum1, sum2;
-
-	(void) lag;
-	FLAC__ASSERT(lag <= 12);
-	FLAC__ASSERT(lag <= data_len);
-
-	sum0 = _mm_setzero_ps();
-	sum1 = _mm_setzero_ps();
-	sum2 = _mm_setzero_ps();
-
-	for(i = 0; i <= limit; i++) {
-		__m128 d, d0, d1, d2;
-		d0 = _mm_loadu_ps(data+i);
-		d1 = _mm_loadu_ps(data+i+4);
-		d2 = _mm_loadu_ps(data+i+8);
-		d = d0; d = _mm_shuffle_ps(d, d, 0);
-		sum0 = _mm_add_ps(sum0, _mm_mul_ps(d0, d));
-		sum1 = _mm_add_ps(sum1, _mm_mul_ps(d1, d));
-		sum2 = _mm_add_ps(sum2, _mm_mul_ps(d2, d));
-	}
-
-	{
-		__m128 d0 = _mm_setzero_ps();
-		__m128 d1 = _mm_setzero_ps();
-		__m128 d2 = _mm_setzero_ps();
-		limit++; if(limit < 0) limit = 0;
-
-		for(i = data_len-1; i >= limit; i--) {
-			__m128 d;
-			d = _mm_load_ss(data+i); d = _mm_shuffle_ps(d, d, 0);
-			d2 = _mm_shuffle_ps(d2, d2, _MM_SHUFFLE(2,1,0,3));
-			d1 = _mm_shuffle_ps(d1, d1, _MM_SHUFFLE(2,1,0,3));
-			d0 = _mm_shuffle_ps(d0, d0, _MM_SHUFFLE(2,1,0,3));
-			d2 = _mm_move_ss(d2, d1);
-			d1 = _mm_move_ss(d1, d0);
-			d0 = _mm_move_ss(d0, d);
-			sum2 = _mm_add_ps(sum2, _mm_mul_ps(d, d2));
-			sum1 = _mm_add_ps(sum1, _mm_mul_ps(d, d1));
-			sum0 = _mm_add_ps(sum0, _mm_mul_ps(d, d0));
-		}
-	}
-
-	_mm_storeu_ps(autoc,   sum0);
-	_mm_storeu_ps(autoc+4, sum1);
-	_mm_storeu_ps(autoc+8, sum2);
-}
-
-FLAC__SSE_TARGET("sse")
-void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
-{
-	int i;
-	int limit = data_len - 16;
-	__m128 sum0, sum1, sum2, sum3;
-
-	(void) lag;
-	FLAC__ASSERT(lag <= 16);
-	FLAC__ASSERT(lag <= data_len);
-
-	sum0 = _mm_setzero_ps();
-	sum1 = _mm_setzero_ps();
-	sum2 = _mm_setzero_ps();
-	sum3 = _mm_setzero_ps();
-
-	for(i = 0; i <= limit; i++) {
-		__m128 d, d0, d1, d2, d3;
-		d0 = _mm_loadu_ps(data+i);
-		d1 = _mm_loadu_ps(data+i+4);
-		d2 = _mm_loadu_ps(data+i+8);
-		d3 = _mm_loadu_ps(data+i+12);
-		d = d0; d = _mm_shuffle_ps(d, d, 0);
-		sum0 = _mm_add_ps(sum0, _mm_mul_ps(d0, d));
-		sum1 = _mm_add_ps(sum1, _mm_mul_ps(d1, d));
-		sum2 = _mm_add_ps(sum2, _mm_mul_ps(d2, d));
-		sum3 = _mm_add_ps(sum3, _mm_mul_ps(d3, d));
-	}
-
-	{
-		__m128 d0 = _mm_setzero_ps();
-		__m128 d1 = _mm_setzero_ps();
-		__m128 d2 = _mm_setzero_ps();
-		__m128 d3 = _mm_setzero_ps();
-		limit++; if(limit < 0) limit = 0;
-
-		for(i = data_len-1; i >= limit; i--) {
-			__m128 d;
-			d = _mm_load_ss(data+i); d = _mm_shuffle_ps(d, d, 0);
-			d3 = _mm_shuffle_ps(d3, d3, _MM_SHUFFLE(2,1,0,3));
-			d2 = _mm_shuffle_ps(d2, d2, _MM_SHUFFLE(2,1,0,3));
-			d1 = _mm_shuffle_ps(d1, d1, _MM_SHUFFLE(2,1,0,3));
-			d0 = _mm_shuffle_ps(d0, d0, _MM_SHUFFLE(2,1,0,3));
-			d3 = _mm_move_ss(d3, d2);
-			d2 = _mm_move_ss(d2, d1);
-			d1 = _mm_move_ss(d1, d0);
-			d0 = _mm_move_ss(d0, d);
-			sum3 = _mm_add_ps(sum3, _mm_mul_ps(d, d3));
-			sum2 = _mm_add_ps(sum2, _mm_mul_ps(d, d2));
-			sum1 = _mm_add_ps(sum1, _mm_mul_ps(d, d1));
-			sum0 = _mm_add_ps(sum0, _mm_mul_ps(d, d0));
-		}
-	}
-
-	_mm_storeu_ps(autoc,   sum0);
-	_mm_storeu_ps(autoc+4, sum1);
-	_mm_storeu_ps(autoc+8, sum2);
-	_mm_storeu_ps(autoc+12,sum3);
-}
-
-#else
-/* Faster on older Intel CPUs (up to Core 2) */
-
-FLAC__SSE_TARGET("sse")
-void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
-{
-	__m128 xmm0, xmm2, xmm5;
-
-	(void) lag;
-	FLAC__ASSERT(lag > 0);
-	FLAC__ASSERT(lag <= 4);
-	FLAC__ASSERT(lag <= data_len);
-	FLAC__ASSERT(data_len > 0);
-
-	xmm5 = _mm_setzero_ps();
-
-	xmm0 = _mm_load_ss(data++);
-	xmm2 = xmm0;
-	xmm0 = _mm_shuffle_ps(xmm0, xmm0, 0);
-
-	xmm0 = _mm_mul_ps(xmm0, xmm2);
-	xmm5 = _mm_add_ps(xmm5, xmm0);
-
-	data_len--;
-
-	while(data_len)
-	{
-		xmm0 = _mm_load1_ps(data++);
-
-		xmm2 = _mm_shuffle_ps(xmm2, xmm2, _MM_SHUFFLE(2,1,0,3));
-		xmm2 = _mm_move_ss(xmm2, xmm0);
-		xmm0 = _mm_mul_ps(xmm0, xmm2);
-		xmm5 = _mm_add_ps(xmm5, xmm0);
-
-		data_len--;
-	}
-
-	_mm_storeu_ps(autoc, xmm5);
-}
-
-FLAC__SSE_TARGET("sse")
-void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
-{
-	__m128 xmm0, xmm1, xmm2, xmm3, xmm5, xmm6;
-
-	(void) lag;
-	FLAC__ASSERT(lag > 0);
-	FLAC__ASSERT(lag <= 8);
-	FLAC__ASSERT(lag <= data_len);
-	FLAC__ASSERT(data_len > 0);
-
-	xmm5 = _mm_setzero_ps();
-	xmm6 = _mm_setzero_ps();
-
-	xmm0 = _mm_load_ss(data++);
-	xmm2 = xmm0;
-	xmm0 = _mm_shuffle_ps(xmm0, xmm0, 0);
-	xmm3 = _mm_setzero_ps();
-
-	xmm0 = _mm_mul_ps(xmm0, xmm2);
-	xmm5 = _mm_add_ps(xmm5, xmm0);
-
-	data_len--;
-
-	while(data_len)
-	{
-		xmm0 = _mm_load1_ps(data++);
-
-		xmm2 = _mm_shuffle_ps(xmm2, xmm2, _MM_SHUFFLE(2,1,0,3));
-		xmm3 = _mm_shuffle_ps(xmm3, xmm3, _MM_SHUFFLE(2,1,0,3));
-		xmm3 = _mm_move_ss(xmm3, xmm2);
-		xmm2 = _mm_move_ss(xmm2, xmm0);
-
-		xmm1 = xmm0;
-		xmm1 = _mm_mul_ps(xmm1, xmm3);
-		xmm0 = _mm_mul_ps(xmm0, xmm2);
-		xmm6 = _mm_add_ps(xmm6, xmm1);
-		xmm5 = _mm_add_ps(xmm5, xmm0);
-
-		data_len--;
-	}
-
-	_mm_storeu_ps(autoc,   xmm5);
-	_mm_storeu_ps(autoc+4, xmm6);
-}
-
-FLAC__SSE_TARGET("sse")
-void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
-{
-	__m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
-
-	(void) lag;
-	FLAC__ASSERT(lag > 0);
-	FLAC__ASSERT(lag <= 12);
-	FLAC__ASSERT(lag <= data_len);
-	FLAC__ASSERT(data_len > 0);
-
-	xmm5 = _mm_setzero_ps();
-	xmm6 = _mm_setzero_ps();
-	xmm7 = _mm_setzero_ps();
-
-	xmm0 = _mm_load_ss(data++);
-	xmm2 = xmm0;
-	xmm0 = _mm_shuffle_ps(xmm0, xmm0, 0);
-	xmm3 = _mm_setzero_ps();
-	xmm4 = _mm_setzero_ps();
-
-	xmm0 = _mm_mul_ps(xmm0, xmm2);
-	xmm5 = _mm_add_ps(xmm5, xmm0);
-
-	data_len--;
-
-	while(data_len)
-	{
-		xmm0 = _mm_load1_ps(data++);
-
-		xmm2 = _mm_shuffle_ps(xmm2, xmm2, _MM_SHUFFLE(2,1,0,3));
-		xmm3 = _mm_shuffle_ps(xmm3, xmm3, _MM_SHUFFLE(2,1,0,3));
-		xmm4 = _mm_shuffle_ps(xmm4, xmm4, _MM_SHUFFLE(2,1,0,3));
-		xmm4 = _mm_move_ss(xmm4, xmm3);
-		xmm3 = _mm_move_ss(xmm3, xmm2);
-		xmm2 = _mm_move_ss(xmm2, xmm0);
-
-		xmm1 = xmm0;
-		xmm1 = _mm_mul_ps(xmm1, xmm2);
-		xmm5 = _mm_add_ps(xmm5, xmm1);
-		xmm1 = xmm0;
-		xmm1 = _mm_mul_ps(xmm1, xmm3);
-		xmm6 = _mm_add_ps(xmm6, xmm1);
-		xmm0 = _mm_mul_ps(xmm0, xmm4);
-		xmm7 = _mm_add_ps(xmm7, xmm0);
-
-		data_len--;
-	}
-
-	_mm_storeu_ps(autoc,   xmm5);
-	_mm_storeu_ps(autoc+4, xmm6);
-	_mm_storeu_ps(autoc+8, xmm7);
-}
-
-FLAC__SSE_TARGET("sse")
-void FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[])
-{
-	__m128 xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8, xmm9;
-
-	(void) lag;
-	FLAC__ASSERT(lag > 0);
-	FLAC__ASSERT(lag <= 16);
-	FLAC__ASSERT(lag <= data_len);
-	FLAC__ASSERT(data_len > 0);
-
-	xmm6 = _mm_setzero_ps();
-	xmm7 = _mm_setzero_ps();
-	xmm8 = _mm_setzero_ps();
-	xmm9 = _mm_setzero_ps();
-
-	xmm0 = _mm_load_ss(data++);
-	xmm2 = xmm0;
-	xmm0 = _mm_shuffle_ps(xmm0, xmm0, 0);
-	xmm3 = _mm_setzero_ps();
-	xmm4 = _mm_setzero_ps();
-	xmm5 = _mm_setzero_ps();
-
-	xmm0 = _mm_mul_ps(xmm0, xmm2);
-	xmm6 = _mm_add_ps(xmm6, xmm0);
-
-	data_len--;
-
-	while(data_len)
-	{
-		xmm0 = _mm_load1_ps(data++);
-
-		/* shift xmm5:xmm4:xmm3:xmm2 left by one float */
-		xmm5 = _mm_shuffle_ps(xmm5, xmm5, _MM_SHUFFLE(2,1,0,3));
-		xmm4 = _mm_shuffle_ps(xmm4, xmm4, _MM_SHUFFLE(2,1,0,3));
-		xmm3 = _mm_shuffle_ps(xmm3, xmm3, _MM_SHUFFLE(2,1,0,3));
-		xmm2 = _mm_shuffle_ps(xmm2, xmm2, _MM_SHUFFLE(2,1,0,3));
-		xmm5 = _mm_move_ss(xmm5, xmm4);
-		xmm4 = _mm_move_ss(xmm4, xmm3);
-		xmm3 = _mm_move_ss(xmm3, xmm2);
-		xmm2 = _mm_move_ss(xmm2, xmm0);
-
-		/* xmm9|xmm8|xmm7|xmm6 += xmm0|xmm0|xmm0|xmm0 * xmm5|xmm4|xmm3|xmm2 */
-		xmm1 = xmm0;
-		xmm1 = _mm_mul_ps(xmm1, xmm5);
-		xmm9 = _mm_add_ps(xmm9, xmm1);
-		xmm1 = xmm0;
-		xmm1 = _mm_mul_ps(xmm1, xmm4);
-		xmm8 = _mm_add_ps(xmm8, xmm1);
-		xmm1 = xmm0;
-		xmm1 = _mm_mul_ps(xmm1, xmm3);
-		xmm7 = _mm_add_ps(xmm7, xmm1);
-		xmm0 = _mm_mul_ps(xmm0, xmm2);
-		xmm6 = _mm_add_ps(xmm6, xmm0);
-
-		data_len--;
-	}
-
-	_mm_storeu_ps(autoc,   xmm6);
-	_mm_storeu_ps(autoc+4, xmm7);
-	_mm_storeu_ps(autoc+8, xmm8);
-	_mm_storeu_ps(autoc+12,xmm9);
-}
-#endif
-
-#endif /* FLAC__SSE_SUPPORTED */
-#endif /* (FLAC__CPU_IA32 || FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN */
-#endif /* FLAC__NO_ASM */
-#endif /* FLAC__INTEGER_ONLY_LIBRARY */

diff --git a/src/libFLAC/lpc_intrin_sse2.c b/src/libFLAC/lpc_intrin_sse2.c
index e1908ed..d166d9b 100644
--- a/src/libFLAC/lpc_intrin_sse2.c
+++ b/src/libFLAC/lpc_intrin_sse2.c

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2000-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -34,9 +34,11 @@
 #  include <config.h>
 #endif
 
+#include "private/cpu.h"
+
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
 #ifndef FLAC__NO_ASM
-#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
+#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN
 #include "private/lpc.h"
 #ifdef FLAC__SSE2_SUPPORTED
 
@@ -45,18 +47,41 @@
 
 #include <emmintrin.h> /* SSE2 */
 
-#define RESIDUAL16_RESULT(xmmN) curr = *data++; *residual++ = curr - (_mm_cvtsi128_si32(xmmN) >> lp_quantization);
-#define     DATA16_RESULT(xmmN) curr = *residual++ + (_mm_cvtsi128_si32(xmmN) >> lp_quantization); *data++ = curr;
-
 #define RESIDUAL32_RESULT(xmmN) residual[i] = data[i] - (_mm_cvtsi128_si32(xmmN) >> lp_quantization);
 #define     DATA32_RESULT(xmmN) data[i] = residual[i] + (_mm_cvtsi128_si32(xmmN) >> lp_quantization);
 
+
 FLAC__SSE_TARGET("sse2")
-void FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
+void FLAC__lpc_compute_autocorrelation_intrin_sse2_lag_8(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[])
+{
+#undef MAX_LAG
+#define MAX_LAG 8
+#include "deduplication/lpc_compute_autocorrelation_intrin_sse2.c"
+}
+
+FLAC__SSE_TARGET("sse2")
+void FLAC__lpc_compute_autocorrelation_intrin_sse2_lag_10(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[])
+{
+#undef MAX_LAG
+#define MAX_LAG 10
+#include "deduplication/lpc_compute_autocorrelation_intrin_sse2.c"
+}
+
+
+FLAC__SSE_TARGET("sse2")
+void FLAC__lpc_compute_autocorrelation_intrin_sse2_lag_14(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[])
+{
+#undef MAX_LAG
+#define MAX_LAG 14
+#include "deduplication/lpc_compute_autocorrelation_intrin_sse2.c"
+}
+
+FLAC__SSE_TARGET("sse2")
+void FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[])
 {
 	int i;
 	FLAC__int32 sum;
-	__m128i cnt = _mm_cvtsi32_si128(lp_quantization);
+	const __m128i cnt = _mm_cvtsi32_si128(lp_quantization);
 
 	FLAC__ASSERT(order > 0);
 	FLAC__ASSERT(order <= 32);
@@ -81,20 +106,20 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m128i summ, mull;
-						summ = _mm_madd_epi16(q11, _mm_loadu_si128((const __m128i*)(data+i-12)));
-						mull = _mm_madd_epi16(q10, _mm_loadu_si128((const __m128i*)(data+i-11))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q9, _mm_loadu_si128((const __m128i*)(data+i-10))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q8, _mm_loadu_si128((const __m128i*)(data+i-9))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q7, _mm_loadu_si128((const __m128i*)(data+i-8))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q6, _mm_loadu_si128((const __m128i*)(data+i-7))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q5, _mm_loadu_si128((const __m128i*)(data+i-6))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
+						summ = _mm_madd_epi16(q11, _mm_loadu_si128((const __m128i*)(const void*)(data+i-12)));
+						mull = _mm_madd_epi16(q10, _mm_loadu_si128((const __m128i*)(const void*)(data+i-11))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q9, _mm_loadu_si128((const __m128i*)(const void*)(data+i-10))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q8, _mm_loadu_si128((const __m128i*)(const void*)(data+i-9))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q7, _mm_loadu_si128((const __m128i*)(const void*)(data+i-8))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q6, _mm_loadu_si128((const __m128i*)(const void*)(data+i-7))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q5, _mm_loadu_si128((const __m128i*)(const void*)(data+i-6))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q4, _mm_loadu_si128((const __m128i*)(const void*)(data+i-5))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q3, _mm_loadu_si128((const __m128i*)(const void*)(data+i-4))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(const void*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(const void*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(const void*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
 						summ = _mm_sra_epi32(summ, cnt);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), summ));
 					}
 				}
 				else { /* order == 11 */
@@ -113,19 +138,19 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m128i summ, mull;
-						summ = _mm_madd_epi16(q10, _mm_loadu_si128((const __m128i*)(data+i-11)));
-						mull = _mm_madd_epi16(q9, _mm_loadu_si128((const __m128i*)(data+i-10))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q8, _mm_loadu_si128((const __m128i*)(data+i-9))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q7, _mm_loadu_si128((const __m128i*)(data+i-8))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q6, _mm_loadu_si128((const __m128i*)(data+i-7))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q5, _mm_loadu_si128((const __m128i*)(data+i-6))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
+						summ = _mm_madd_epi16(q10, _mm_loadu_si128((const __m128i*)(const void*)(data+i-11)));
+						mull = _mm_madd_epi16(q9, _mm_loadu_si128((const __m128i*)(const void*)(data+i-10))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q8, _mm_loadu_si128((const __m128i*)(const void*)(data+i-9))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q7, _mm_loadu_si128((const __m128i*)(const void*)(data+i-8))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q6, _mm_loadu_si128((const __m128i*)(const void*)(data+i-7))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q5, _mm_loadu_si128((const __m128i*)(const void*)(data+i-6))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q4, _mm_loadu_si128((const __m128i*)(const void*)(data+i-5))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q3, _mm_loadu_si128((const __m128i*)(const void*)(data+i-4))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(const void*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(const void*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(const void*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
 						summ = _mm_sra_epi32(summ, cnt);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), summ));
 					}
 				}
 			}
@@ -145,18 +170,18 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m128i summ, mull;
-						summ = _mm_madd_epi16(q9, _mm_loadu_si128((const __m128i*)(data+i-10)));
-						mull = _mm_madd_epi16(q8, _mm_loadu_si128((const __m128i*)(data+i-9))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q7, _mm_loadu_si128((const __m128i*)(data+i-8))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q6, _mm_loadu_si128((const __m128i*)(data+i-7))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q5, _mm_loadu_si128((const __m128i*)(data+i-6))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
+						summ = _mm_madd_epi16(q9, _mm_loadu_si128((const __m128i*)(const void*)(data+i-10)));
+						mull = _mm_madd_epi16(q8, _mm_loadu_si128((const __m128i*)(const void*)(data+i-9))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q7, _mm_loadu_si128((const __m128i*)(const void*)(data+i-8))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q6, _mm_loadu_si128((const __m128i*)(const void*)(data+i-7))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q5, _mm_loadu_si128((const __m128i*)(const void*)(data+i-6))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q4, _mm_loadu_si128((const __m128i*)(const void*)(data+i-5))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q3, _mm_loadu_si128((const __m128i*)(const void*)(data+i-4))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(const void*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(const void*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(const void*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
 						summ = _mm_sra_epi32(summ, cnt);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), summ));
 					}
 				}
 				else { /* order == 9 */
@@ -173,17 +198,17 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m128i summ, mull;
-						summ = _mm_madd_epi16(q8, _mm_loadu_si128((const __m128i*)(data+i-9)));
-						mull = _mm_madd_epi16(q7, _mm_loadu_si128((const __m128i*)(data+i-8))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q6, _mm_loadu_si128((const __m128i*)(data+i-7))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q5, _mm_loadu_si128((const __m128i*)(data+i-6))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
+						summ = _mm_madd_epi16(q8, _mm_loadu_si128((const __m128i*)(const void*)(data+i-9)));
+						mull = _mm_madd_epi16(q7, _mm_loadu_si128((const __m128i*)(const void*)(data+i-8))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q6, _mm_loadu_si128((const __m128i*)(const void*)(data+i-7))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q5, _mm_loadu_si128((const __m128i*)(const void*)(data+i-6))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q4, _mm_loadu_si128((const __m128i*)(const void*)(data+i-5))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q3, _mm_loadu_si128((const __m128i*)(const void*)(data+i-4))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(const void*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(const void*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(const void*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
 						summ = _mm_sra_epi32(summ, cnt);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), summ));
 					}
 				}
 			}
@@ -203,16 +228,16 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m128i summ, mull;
-						summ = _mm_madd_epi16(q7, _mm_loadu_si128((const __m128i*)(data+i-8)));
-						mull = _mm_madd_epi16(q6, _mm_loadu_si128((const __m128i*)(data+i-7))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q5, _mm_loadu_si128((const __m128i*)(data+i-6))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
+						summ = _mm_madd_epi16(q7, _mm_loadu_si128((const __m128i*)(const void*)(data+i-8)));
+						mull = _mm_madd_epi16(q6, _mm_loadu_si128((const __m128i*)(const void*)(data+i-7))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q5, _mm_loadu_si128((const __m128i*)(const void*)(data+i-6))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q4, _mm_loadu_si128((const __m128i*)(const void*)(data+i-5))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q3, _mm_loadu_si128((const __m128i*)(const void*)(data+i-4))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(const void*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(const void*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(const void*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
 						summ = _mm_sra_epi32(summ, cnt);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), summ));
 					}
 				}
 				else { /* order == 7 */
@@ -227,15 +252,15 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m128i summ, mull;
-						summ = _mm_madd_epi16(q6, _mm_loadu_si128((const __m128i*)(data+i-7)));
-						mull = _mm_madd_epi16(q5, _mm_loadu_si128((const __m128i*)(data+i-6))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
+						summ = _mm_madd_epi16(q6, _mm_loadu_si128((const __m128i*)(const void*)(data+i-7)));
+						mull = _mm_madd_epi16(q5, _mm_loadu_si128((const __m128i*)(const void*)(data+i-6))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q4, _mm_loadu_si128((const __m128i*)(const void*)(data+i-5))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q3, _mm_loadu_si128((const __m128i*)(const void*)(data+i-4))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(const void*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(const void*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(const void*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
 						summ = _mm_sra_epi32(summ, cnt);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), summ));
 					}
 				}
 			}
@@ -251,14 +276,14 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m128i summ, mull;
-						summ = _mm_madd_epi16(q5, _mm_loadu_si128((const __m128i*)(data+i-6)));
-						mull = _mm_madd_epi16(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
+						summ = _mm_madd_epi16(q5, _mm_loadu_si128((const __m128i*)(const void*)(data+i-6)));
+						mull = _mm_madd_epi16(q4, _mm_loadu_si128((const __m128i*)(const void*)(data+i-5))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q3, _mm_loadu_si128((const __m128i*)(const void*)(data+i-4))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(const void*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(const void*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(const void*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
 						summ = _mm_sra_epi32(summ, cnt);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), summ));
 					}
 				}
 				else { /* order == 5 */
@@ -271,13 +296,13 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m128i summ, mull;
-						summ = _mm_madd_epi16(q4, _mm_loadu_si128((const __m128i*)(data+i-5)));
-						mull = _mm_madd_epi16(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
+						summ = _mm_madd_epi16(q4, _mm_loadu_si128((const __m128i*)(const void*)(data+i-5)));
+						mull = _mm_madd_epi16(q3, _mm_loadu_si128((const __m128i*)(const void*)(data+i-4))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(const void*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(const void*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(const void*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
 						summ = _mm_sra_epi32(summ, cnt);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), summ));
 					}
 				}
 			}
@@ -293,12 +318,12 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m128i summ, mull;
-						summ = _mm_madd_epi16(q3, _mm_loadu_si128((const __m128i*)(data+i-4)));
-						mull = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
+						summ = _mm_madd_epi16(q3, _mm_loadu_si128((const __m128i*)(const void*)(data+i-4)));
+						mull = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(const void*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(const void*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(const void*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
 						summ = _mm_sra_epi32(summ, cnt);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), summ));
 					}
 				}
 				else { /* order == 3 */
@@ -309,11 +334,11 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m128i summ, mull;
-						summ = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(data+i-3)));
-						mull = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
+						summ = _mm_madd_epi16(q2, _mm_loadu_si128((const __m128i*)(const void*)(data+i-3)));
+						mull = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(const void*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(const void*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
 						summ = _mm_sra_epi32(summ, cnt);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), summ));
 					}
 				}
 			}
@@ -325,10 +350,10 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m128i summ, mull;
-						summ = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(data+i-2)));
-						mull = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
+						summ = _mm_madd_epi16(q1, _mm_loadu_si128((const __m128i*)(const void*)(data+i-2)));
+						mull = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(const void*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
 						summ = _mm_sra_epi32(summ, cnt);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), summ));
 					}
 				}
 				else { /* order == 1 */
@@ -337,9 +362,9 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m128i summ;
-						summ = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(data+i-1)));
+						summ = _mm_madd_epi16(q0, _mm_loadu_si128((const __m128i*)(const void*)(data+i-1)));
 						summ = _mm_sra_epi32(summ, cnt);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), summ));
 					}
 				}
 			}
@@ -347,17 +372,17 @@
 		for(; i < (int)data_len; i++) {
 			sum = 0;
 			switch(order) {
-				case 12: sum += qlp_coeff[11] * data[i-12];
-				case 11: sum += qlp_coeff[10] * data[i-11];
-				case 10: sum += qlp_coeff[ 9] * data[i-10];
-				case 9:  sum += qlp_coeff[ 8] * data[i- 9];
-				case 8:  sum += qlp_coeff[ 7] * data[i- 8];
-				case 7:  sum += qlp_coeff[ 6] * data[i- 7];
-				case 6:  sum += qlp_coeff[ 5] * data[i- 6];
-				case 5:  sum += qlp_coeff[ 4] * data[i- 5];
-				case 4:  sum += qlp_coeff[ 3] * data[i- 4];
-				case 3:  sum += qlp_coeff[ 2] * data[i- 3];
-				case 2:  sum += qlp_coeff[ 1] * data[i- 2];
+				case 12: sum += qlp_coeff[11] * data[i-12]; /* Falls through. */
+				case 11: sum += qlp_coeff[10] * data[i-11]; /* Falls through. */
+				case 10: sum += qlp_coeff[ 9] * data[i-10]; /* Falls through. */
+				case 9:  sum += qlp_coeff[ 8] * data[i- 9]; /* Falls through. */
+				case 8:  sum += qlp_coeff[ 7] * data[i- 8]; /* Falls through. */
+				case 7:  sum += qlp_coeff[ 6] * data[i- 7]; /* Falls through. */
+				case 6:  sum += qlp_coeff[ 5] * data[i- 6]; /* Falls through. */
+				case 5:  sum += qlp_coeff[ 4] * data[i- 5]; /* Falls through. */
+				case 4:  sum += qlp_coeff[ 3] * data[i- 4]; /* Falls through. */
+				case 3:  sum += qlp_coeff[ 2] * data[i- 3]; /* Falls through. */
+				case 2:  sum += qlp_coeff[ 1] * data[i- 2]; /* Falls through. */
 				case 1:  sum += qlp_coeff[ 0] * data[i- 1];
 			}
 			residual[i] = data[i] - (sum >> lp_quantization);
@@ -367,25 +392,25 @@
 		for(i = 0; i < (int)data_len; i++) {
 			sum = 0;
 			switch(order) {
-				case 32: sum += qlp_coeff[31] * data[i-32];
-				case 31: sum += qlp_coeff[30] * data[i-31];
-				case 30: sum += qlp_coeff[29] * data[i-30];
-				case 29: sum += qlp_coeff[28] * data[i-29];
-				case 28: sum += qlp_coeff[27] * data[i-28];
-				case 27: sum += qlp_coeff[26] * data[i-27];
-				case 26: sum += qlp_coeff[25] * data[i-26];
-				case 25: sum += qlp_coeff[24] * data[i-25];
-				case 24: sum += qlp_coeff[23] * data[i-24];
-				case 23: sum += qlp_coeff[22] * data[i-23];
-				case 22: sum += qlp_coeff[21] * data[i-22];
-				case 21: sum += qlp_coeff[20] * data[i-21];
-				case 20: sum += qlp_coeff[19] * data[i-20];
-				case 19: sum += qlp_coeff[18] * data[i-19];
-				case 18: sum += qlp_coeff[17] * data[i-18];
-				case 17: sum += qlp_coeff[16] * data[i-17];
-				case 16: sum += qlp_coeff[15] * data[i-16];
-				case 15: sum += qlp_coeff[14] * data[i-15];
-				case 14: sum += qlp_coeff[13] * data[i-14];
+				case 32: sum += qlp_coeff[31] * data[i-32]; /* Falls through. */
+				case 31: sum += qlp_coeff[30] * data[i-31]; /* Falls through. */
+				case 30: sum += qlp_coeff[29] * data[i-30]; /* Falls through. */
+				case 29: sum += qlp_coeff[28] * data[i-29]; /* Falls through. */
+				case 28: sum += qlp_coeff[27] * data[i-28]; /* Falls through. */
+				case 27: sum += qlp_coeff[26] * data[i-27]; /* Falls through. */
+				case 26: sum += qlp_coeff[25] * data[i-26]; /* Falls through. */
+				case 25: sum += qlp_coeff[24] * data[i-25]; /* Falls through. */
+				case 24: sum += qlp_coeff[23] * data[i-24]; /* Falls through. */
+				case 23: sum += qlp_coeff[22] * data[i-23]; /* Falls through. */
+				case 22: sum += qlp_coeff[21] * data[i-22]; /* Falls through. */
+				case 21: sum += qlp_coeff[20] * data[i-21]; /* Falls through. */
+				case 20: sum += qlp_coeff[19] * data[i-20]; /* Falls through. */
+				case 19: sum += qlp_coeff[18] * data[i-19]; /* Falls through. */
+				case 18: sum += qlp_coeff[17] * data[i-18]; /* Falls through. */
+				case 17: sum += qlp_coeff[16] * data[i-17]; /* Falls through. */
+				case 16: sum += qlp_coeff[15] * data[i-16]; /* Falls through. */
+				case 15: sum += qlp_coeff[14] * data[i-15]; /* Falls through. */
+				case 14: sum += qlp_coeff[13] * data[i-14]; /* Falls through. */
 				case 13: sum += qlp_coeff[12] * data[i-13];
 				         sum += qlp_coeff[11] * data[i-12];
 				         sum += qlp_coeff[10] * data[i-11];
@@ -406,7 +431,7 @@
 }
 
 FLAC__SSE_TARGET("sse2")
-void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
+void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[])
 {
 	int i;
 
@@ -418,12 +443,12 @@
 			if(order > 10) { /* order == 11, 12 */
 				if(order == 12) {
 					__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
-					xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));  // 0  0  q[1]  q[0]
-					xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2));  // 0  0  q[3]  q[2]
-					xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4));  // 0  0  q[5]  q[4]
-					xmm3 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+6));  // 0  0  q[7]  q[6]
-					xmm4 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+8));  // 0  0  q[9]  q[8]
-					xmm5 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+10)); // 0  0  q[11] q[10]
+					xmm0 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+0));  // 0  0  q[1]  q[0]
+					xmm1 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+2));  // 0  0  q[3]  q[2]
+					xmm2 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+4));  // 0  0  q[5]  q[4]
+					xmm3 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+6));  // 0  0  q[7]  q[6]
+					xmm4 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+8));  // 0  0  q[9]  q[8]
+					xmm5 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+10)); // 0  0  q[11] q[10]
 
 					xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0)); // 0  q[1]  0  q[0]
 					xmm1 = _mm_shuffle_epi32(xmm1, _MM_SHUFFLE(3,1,2,0)); // 0  q[3]  0  q[2]
@@ -436,41 +461,41 @@
 						//sum = 0;
 						//sum += qlp_coeff[11] * data[i-12];
 						//sum += qlp_coeff[10] * data[i-11];
-						xmm7 = _mm_loadl_epi64((const __m128i*)(data+i-12));  // 0   0        d[i-11]  d[i-12]
+						xmm7 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-12));  // 0   0        d[i-11]  d[i-12]
 						xmm7 = _mm_shuffle_epi32(xmm7, _MM_SHUFFLE(2,0,3,1)); // 0  d[i-12]   0        d[i-11]
 						xmm7 = _mm_mul_epu32(xmm7, xmm5); /* we use _unsigned_ multiplication and discard high dword of the result values */
 
 						//sum += qlp_coeff[9] * data[i-10];
 						//sum += qlp_coeff[8] * data[i-9];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-10));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-10));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epu32(xmm6, xmm4);
 						xmm7 = _mm_add_epi32(xmm7, xmm6);
 
 						//sum += qlp_coeff[7] * data[i-8];
 						//sum += qlp_coeff[6] * data[i-7];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-8));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-8));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epu32(xmm6, xmm3);
 						xmm7 = _mm_add_epi32(xmm7, xmm6);
 
 						//sum += qlp_coeff[5] * data[i-6];
 						//sum += qlp_coeff[4] * data[i-5];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-6));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-6));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epu32(xmm6, xmm2);
 						xmm7 = _mm_add_epi32(xmm7, xmm6);
 
 						//sum += qlp_coeff[3] * data[i-4];
 						//sum += qlp_coeff[2] * data[i-3];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-4));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-4));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epu32(xmm6, xmm1);
 						xmm7 = _mm_add_epi32(xmm7, xmm6);
 
 						//sum += qlp_coeff[1] * data[i-2];
 						//sum += qlp_coeff[0] * data[i-1];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-2));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epu32(xmm6, xmm0);
 						xmm7 = _mm_add_epi32(xmm7, xmm6);
@@ -481,11 +506,11 @@
 				}
 				else { /* order == 11 */
 					__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
-					xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));
-					xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2));
-					xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4));
-					xmm3 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+6));
-					xmm4 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+8));
+					xmm0 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+0));
+					xmm1 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+2));
+					xmm2 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+4));
+					xmm3 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+6));
+					xmm4 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+8));
 					xmm5 = _mm_cvtsi32_si128(qlp_coeff[10]);
 
 					xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0));
@@ -502,35 +527,35 @@
 
 						//sum += qlp_coeff[9] * data[i-10];
 						//sum += qlp_coeff[8] * data[i-9];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-10));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-10));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epu32(xmm6, xmm4);
 						xmm7 = _mm_add_epi32(xmm7, xmm6);
 
 						//sum += qlp_coeff[7] * data[i-8];
 						//sum += qlp_coeff[6] * data[i-7];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-8));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-8));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epu32(xmm6, xmm3);
 						xmm7 = _mm_add_epi32(xmm7, xmm6);
 
 						//sum += qlp_coeff[5] * data[i-6];
 						//sum += qlp_coeff[4] * data[i-5];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-6));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-6));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epu32(xmm6, xmm2);
 						xmm7 = _mm_add_epi32(xmm7, xmm6);
 
 						//sum += qlp_coeff[3] * data[i-4];
 						//sum += qlp_coeff[2] * data[i-3];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-4));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-4));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epu32(xmm6, xmm1);
 						xmm7 = _mm_add_epi32(xmm7, xmm6);
 
 						//sum += qlp_coeff[1] * data[i-2];
 						//sum += qlp_coeff[0] * data[i-1];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-2));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epu32(xmm6, xmm0);
 						xmm7 = _mm_add_epi32(xmm7, xmm6);
@@ -543,11 +568,11 @@
 			else { /* order == 9, 10 */
 				if(order == 10) {
 					__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm6, xmm7;
-					xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));
-					xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2));
-					xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4));
-					xmm3 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+6));
-					xmm4 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+8));
+					xmm0 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+0));
+					xmm1 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+2));
+					xmm2 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+4));
+					xmm3 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+6));
+					xmm4 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+8));
 
 					xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0));
 					xmm1 = _mm_shuffle_epi32(xmm1, _MM_SHUFFLE(3,1,2,0));
@@ -559,34 +584,34 @@
 						//sum = 0;
 						//sum += qlp_coeff[9] * data[i-10];
 						//sum += qlp_coeff[8] * data[i-9];
-						xmm7 = _mm_loadl_epi64((const __m128i*)(data+i-10));
+						xmm7 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-10));
 						xmm7 = _mm_shuffle_epi32(xmm7, _MM_SHUFFLE(2,0,3,1));
 						xmm7 = _mm_mul_epu32(xmm7, xmm4);
 
 						//sum += qlp_coeff[7] * data[i-8];
 						//sum += qlp_coeff[6] * data[i-7];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-8));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-8));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epu32(xmm6, xmm3);
 						xmm7 = _mm_add_epi32(xmm7, xmm6);
 
 						//sum += qlp_coeff[5] * data[i-6];
 						//sum += qlp_coeff[4] * data[i-5];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-6));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-6));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epu32(xmm6, xmm2);
 						xmm7 = _mm_add_epi32(xmm7, xmm6);
 
 						//sum += qlp_coeff[3] * data[i-4];
 						//sum += qlp_coeff[2] * data[i-3];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-4));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-4));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epu32(xmm6, xmm1);
 						xmm7 = _mm_add_epi32(xmm7, xmm6);
 
 						//sum += qlp_coeff[1] * data[i-2];
 						//sum += qlp_coeff[0] * data[i-1];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-2));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epu32(xmm6, xmm0);
 						xmm7 = _mm_add_epi32(xmm7, xmm6);
@@ -597,10 +622,10 @@
 				}
 				else { /* order == 9 */
 					__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm6, xmm7;
-					xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));
-					xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2));
-					xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4));
-					xmm3 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+6));
+					xmm0 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+0));
+					xmm1 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+2));
+					xmm2 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+4));
+					xmm3 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+6));
 					xmm4 = _mm_cvtsi32_si128(qlp_coeff[8]);
 
 					xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0));
@@ -616,28 +641,28 @@
 
 						//sum += qlp_coeff[7] * data[i-8];
 						//sum += qlp_coeff[6] * data[i-7];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-8));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-8));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epu32(xmm6, xmm3);
 						xmm7 = _mm_add_epi32(xmm7, xmm6);
 
 						//sum += qlp_coeff[5] * data[i-6];
 						//sum += qlp_coeff[4] * data[i-5];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-6));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-6));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epu32(xmm6, xmm2);
 						xmm7 = _mm_add_epi32(xmm7, xmm6);
 
 						//sum += qlp_coeff[3] * data[i-4];
 						//sum += qlp_coeff[2] * data[i-3];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-4));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-4));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epu32(xmm6, xmm1);
 						xmm7 = _mm_add_epi32(xmm7, xmm6);
 
 						//sum += qlp_coeff[1] * data[i-2];
 						//sum += qlp_coeff[0] * data[i-1];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-2));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epu32(xmm6, xmm0);
 						xmm7 = _mm_add_epi32(xmm7, xmm6);
@@ -652,10 +677,10 @@
 			if(order > 6) { /* order == 7, 8 */
 				if(order == 8) {
 					__m128i xmm0, xmm1, xmm2, xmm3, xmm6, xmm7;
-					xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));
-					xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2));
-					xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4));
-					xmm3 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+6));
+					xmm0 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+0));
+					xmm1 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+2));
+					xmm2 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+4));
+					xmm3 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+6));
 
 					xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0));
 					xmm1 = _mm_shuffle_epi32(xmm1, _MM_SHUFFLE(3,1,2,0));
@@ -666,27 +691,27 @@
 						//sum = 0;
 						//sum += qlp_coeff[7] * data[i-8];
 						//sum += qlp_coeff[6] * data[i-7];
-						xmm7 = _mm_loadl_epi64((const __m128i*)(data+i-8));
+						xmm7 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-8));
 						xmm7 = _mm_shuffle_epi32(xmm7, _MM_SHUFFLE(2,0,3,1));
 						xmm7 = _mm_mul_epu32(xmm7, xmm3);
 
 						//sum += qlp_coeff[5] * data[i-6];
 						//sum += qlp_coeff[4] * data[i-5];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-6));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-6));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epu32(xmm6, xmm2);
 						xmm7 = _mm_add_epi32(xmm7, xmm6);
 
 						//sum += qlp_coeff[3] * data[i-4];
 						//sum += qlp_coeff[2] * data[i-3];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-4));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-4));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epu32(xmm6, xmm1);
 						xmm7 = _mm_add_epi32(xmm7, xmm6);
 
 						//sum += qlp_coeff[1] * data[i-2];
 						//sum += qlp_coeff[0] * data[i-1];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-2));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epu32(xmm6, xmm0);
 						xmm7 = _mm_add_epi32(xmm7, xmm6);
@@ -697,9 +722,9 @@
 				}
 				else { /* order == 7 */
 					__m128i xmm0, xmm1, xmm2, xmm3, xmm6, xmm7;
-					xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));
-					xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2));
-					xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4));
+					xmm0 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+0));
+					xmm1 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+2));
+					xmm2 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+4));
 					xmm3 = _mm_cvtsi32_si128(qlp_coeff[6]);
 
 					xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0));
@@ -714,21 +739,21 @@
 
 						//sum += qlp_coeff[5] * data[i-6];
 						//sum += qlp_coeff[4] * data[i-5];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-6));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-6));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epu32(xmm6, xmm2);
 						xmm7 = _mm_add_epi32(xmm7, xmm6);
 
 						//sum += qlp_coeff[3] * data[i-4];
 						//sum += qlp_coeff[2] * data[i-3];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-4));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-4));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epu32(xmm6, xmm1);
 						xmm7 = _mm_add_epi32(xmm7, xmm6);
 
 						//sum += qlp_coeff[1] * data[i-2];
 						//sum += qlp_coeff[0] * data[i-1];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-2));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epu32(xmm6, xmm0);
 						xmm7 = _mm_add_epi32(xmm7, xmm6);
@@ -741,9 +766,9 @@
 			else { /* order == 5, 6 */
 				if(order == 6) {
 					__m128i xmm0, xmm1, xmm2, xmm6, xmm7;
-					xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));
-					xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2));
-					xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4));
+					xmm0 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+0));
+					xmm1 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+2));
+					xmm2 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+4));
 
 					xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0));
 					xmm1 = _mm_shuffle_epi32(xmm1, _MM_SHUFFLE(3,1,2,0));
@@ -753,20 +778,20 @@
 						//sum = 0;
 						//sum += qlp_coeff[5] * data[i-6];
 						//sum += qlp_coeff[4] * data[i-5];
-						xmm7 = _mm_loadl_epi64((const __m128i*)(data+i-6));
+						xmm7 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-6));
 						xmm7 = _mm_shuffle_epi32(xmm7, _MM_SHUFFLE(2,0,3,1));
 						xmm7 = _mm_mul_epu32(xmm7, xmm2);
 
 						//sum += qlp_coeff[3] * data[i-4];
 						//sum += qlp_coeff[2] * data[i-3];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-4));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-4));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epu32(xmm6, xmm1);
 						xmm7 = _mm_add_epi32(xmm7, xmm6);
 
 						//sum += qlp_coeff[1] * data[i-2];
 						//sum += qlp_coeff[0] * data[i-1];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-2));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epu32(xmm6, xmm0);
 						xmm7 = _mm_add_epi32(xmm7, xmm6);
@@ -777,8 +802,8 @@
 				}
 				else { /* order == 5 */
 					__m128i xmm0, xmm1, xmm2, xmm6, xmm7;
-					xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));
-					xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2));
+					xmm0 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+0));
+					xmm1 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+2));
 					xmm2 = _mm_cvtsi32_si128(qlp_coeff[4]);
 
 					xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0));
@@ -792,14 +817,14 @@
 
 						//sum += qlp_coeff[3] * data[i-4];
 						//sum += qlp_coeff[2] * data[i-3];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-4));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-4));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epu32(xmm6, xmm1);
 						xmm7 = _mm_add_epi32(xmm7, xmm6);
 
 						//sum += qlp_coeff[1] * data[i-2];
 						//sum += qlp_coeff[0] * data[i-1];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-2));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epu32(xmm6, xmm0);
 						xmm7 = _mm_add_epi32(xmm7, xmm6);
@@ -814,8 +839,8 @@
 			if(order > 2) { /* order == 3, 4 */
 				if(order == 4) {
 					__m128i xmm0, xmm1, xmm6, xmm7;
-					xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));
-					xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2));
+					xmm0 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+0));
+					xmm1 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+2));
 
 					xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0));
 					xmm1 = _mm_shuffle_epi32(xmm1, _MM_SHUFFLE(3,1,2,0));
@@ -824,13 +849,13 @@
 						//sum = 0;
 						//sum += qlp_coeff[3] * data[i-4];
 						//sum += qlp_coeff[2] * data[i-3];
-						xmm7 = _mm_loadl_epi64((const __m128i*)(data+i-4));
+						xmm7 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-4));
 						xmm7 = _mm_shuffle_epi32(xmm7, _MM_SHUFFLE(2,0,3,1));
 						xmm7 = _mm_mul_epu32(xmm7, xmm1);
 
 						//sum += qlp_coeff[1] * data[i-2];
 						//sum += qlp_coeff[0] * data[i-1];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-2));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epu32(xmm6, xmm0);
 						xmm7 = _mm_add_epi32(xmm7, xmm6);
@@ -841,7 +866,7 @@
 				}
 				else { /* order == 3 */
 					__m128i xmm0, xmm1, xmm6, xmm7;
-					xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));
+					xmm0 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+0));
 					xmm1 = _mm_cvtsi32_si128(qlp_coeff[2]);
 
 					xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0));
@@ -854,7 +879,7 @@
 
 						//sum += qlp_coeff[1] * data[i-2];
 						//sum += qlp_coeff[0] * data[i-1];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-2));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epu32(xmm6, xmm0);
 						xmm7 = _mm_add_epi32(xmm7, xmm6);
@@ -867,14 +892,14 @@
 			else { /* order == 1, 2 */
 				if(order == 2) {
 					__m128i xmm0, xmm7;
-					xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));
+					xmm0 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+0));
 					xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0));
 
 					for(i = 0; i < (int)data_len; i++) {
 						//sum = 0;
 						//sum += qlp_coeff[1] * data[i-2];
 						//sum += qlp_coeff[0] * data[i-1];
-						xmm7 = _mm_loadl_epi64((const __m128i*)(data+i-2));
+						xmm7 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-2));
 						xmm7 = _mm_shuffle_epi32(xmm7, _MM_SHUFFLE(2,0,3,1));
 						xmm7 = _mm_mul_epu32(xmm7, xmm0);
 
@@ -894,25 +919,25 @@
 		for(i = 0; i < (int)data_len; i++) {
 			sum = 0;
 			switch(order) {
-				case 32: sum += qlp_coeff[31] * data[i-32];
-				case 31: sum += qlp_coeff[30] * data[i-31];
-				case 30: sum += qlp_coeff[29] * data[i-30];
-				case 29: sum += qlp_coeff[28] * data[i-29];
-				case 28: sum += qlp_coeff[27] * data[i-28];
-				case 27: sum += qlp_coeff[26] * data[i-27];
-				case 26: sum += qlp_coeff[25] * data[i-26];
-				case 25: sum += qlp_coeff[24] * data[i-25];
-				case 24: sum += qlp_coeff[23] * data[i-24];
-				case 23: sum += qlp_coeff[22] * data[i-23];
-				case 22: sum += qlp_coeff[21] * data[i-22];
-				case 21: sum += qlp_coeff[20] * data[i-21];
-				case 20: sum += qlp_coeff[19] * data[i-20];
-				case 19: sum += qlp_coeff[18] * data[i-19];
-				case 18: sum += qlp_coeff[17] * data[i-18];
-				case 17: sum += qlp_coeff[16] * data[i-17];
-				case 16: sum += qlp_coeff[15] * data[i-16];
-				case 15: sum += qlp_coeff[14] * data[i-15];
-				case 14: sum += qlp_coeff[13] * data[i-14];
+				case 32: sum += qlp_coeff[31] * data[i-32]; /* Falls through. */
+				case 31: sum += qlp_coeff[30] * data[i-31]; /* Falls through. */
+				case 30: sum += qlp_coeff[29] * data[i-30]; /* Falls through. */
+				case 29: sum += qlp_coeff[28] * data[i-29]; /* Falls through. */
+				case 28: sum += qlp_coeff[27] * data[i-28]; /* Falls through. */
+				case 27: sum += qlp_coeff[26] * data[i-27]; /* Falls through. */
+				case 26: sum += qlp_coeff[25] * data[i-26]; /* Falls through. */
+				case 25: sum += qlp_coeff[24] * data[i-25]; /* Falls through. */
+				case 24: sum += qlp_coeff[23] * data[i-24]; /* Falls through. */
+				case 23: sum += qlp_coeff[22] * data[i-23]; /* Falls through. */
+				case 22: sum += qlp_coeff[21] * data[i-22]; /* Falls through. */
+				case 21: sum += qlp_coeff[20] * data[i-21]; /* Falls through. */
+				case 20: sum += qlp_coeff[19] * data[i-20]; /* Falls through. */
+				case 19: sum += qlp_coeff[18] * data[i-19]; /* Falls through. */
+				case 18: sum += qlp_coeff[17] * data[i-18]; /* Falls through. */
+				case 17: sum += qlp_coeff[16] * data[i-17]; /* Falls through. */
+				case 16: sum += qlp_coeff[15] * data[i-16]; /* Falls through. */
+				case 15: sum += qlp_coeff[14] * data[i-15]; /* Falls through. */
+				case 14: sum += qlp_coeff[13] * data[i-14]; /* Falls through. */
 				case 13: sum += qlp_coeff[12] * data[i-13];
 				         sum += qlp_coeff[11] * data[i-12];
 				         sum += qlp_coeff[10] * data[i-11];
@@ -932,156 +957,6 @@
 	}
 }
 
-#if defined FLAC__CPU_IA32 && !defined FLAC__HAS_NASM /* unused for x64; not better than MMX asm */
-
-FLAC__SSE_TARGET("sse2")
-void FLAC__lpc_restore_signal_16_intrin_sse2(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[])
-{
-	if (order < 8 || order > 12) {
-		FLAC__lpc_restore_signal(residual, data_len, qlp_coeff, order, lp_quantization, data);
-		return;
-	}
-	if (data_len == 0)
-		return;
-
-	FLAC__ASSERT(order >= 8);
-	FLAC__ASSERT(order <= 12);
-
-	if(order > 8) { /* order == 9, 10, 11, 12 */
-		FLAC__int32 curr;
-		__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
-		xmm0 = _mm_loadu_si128((const __m128i*)(qlp_coeff+0));
-		xmm6 = _mm_loadu_si128((const __m128i*)(qlp_coeff+4));
-		xmm1 = _mm_loadu_si128((const __m128i*)(qlp_coeff+8)); /* read 0 to 3 uninitialized coeffs... */
-		switch(order)                                          /* ...and zero them out */
-		{
-		case 9:
-			xmm1 = _mm_slli_si128(xmm1, 12); xmm1 = _mm_srli_si128(xmm1, 12); break;
-		case 10:
-			xmm1 = _mm_slli_si128(xmm1, 8); xmm1 = _mm_srli_si128(xmm1, 8); break;
-		case 11:
-			xmm1 = _mm_slli_si128(xmm1, 4); xmm1 = _mm_srli_si128(xmm1, 4); break;
-		}
-		xmm2 = _mm_setzero_si128();
-		xmm0 = _mm_packs_epi32(xmm0, xmm6);
-		xmm1 = _mm_packs_epi32(xmm1, xmm2);
-
-		xmm4 = _mm_loadu_si128((const __m128i*)(data-12));
-		xmm5 = _mm_loadu_si128((const __m128i*)(data-8));
-		xmm3 = _mm_loadu_si128((const __m128i*)(data-4));
-		xmm4 = _mm_shuffle_epi32(xmm4, _MM_SHUFFLE(0,1,2,3));
-		xmm5 = _mm_shuffle_epi32(xmm5, _MM_SHUFFLE(0,1,2,3));
-		xmm3 = _mm_shuffle_epi32(xmm3, _MM_SHUFFLE(0,1,2,3));
-		xmm4 = _mm_packs_epi32(xmm4, xmm2);
-		xmm3 = _mm_packs_epi32(xmm3, xmm5);
-
-		xmm7 = _mm_slli_si128(xmm1, 2);
-		xmm7 = _mm_or_si128(xmm7, _mm_srli_si128(xmm0, 14));
-		xmm2 = _mm_slli_si128(xmm0, 2);
-
-		/* xmm0, xmm1: qlp_coeff
-			xmm2, xmm7: qlp_coeff << 16 bit
-			xmm3, xmm4: data */
-
-		xmm5 = _mm_madd_epi16(xmm4, xmm1);
-		xmm6 = _mm_madd_epi16(xmm3, xmm0);
-		xmm6 = _mm_add_epi32(xmm6, xmm5);
-		xmm6 = _mm_add_epi32(xmm6, _mm_srli_si128(xmm6, 8));
-		xmm6 = _mm_add_epi32(xmm6, _mm_srli_si128(xmm6, 4));
-
-		DATA16_RESULT(xmm6);
-
-		data_len--;
-
-		if(data_len % 2) {
-			xmm6 = _mm_srli_si128(xmm3, 14);
-			xmm4 = _mm_slli_si128(xmm4, 2);
-			xmm3 = _mm_slli_si128(xmm3, 2);
-			xmm4 = _mm_or_si128(xmm4, xmm6);
-			xmm3 = _mm_insert_epi16(xmm3, curr, 0);
-
-			xmm5 = _mm_madd_epi16(xmm4, xmm1);
-			xmm6 = _mm_madd_epi16(xmm3, xmm0);
-			xmm6 = _mm_add_epi32(xmm6, xmm5);
-			xmm6 = _mm_add_epi32(xmm6, _mm_srli_si128(xmm6, 8));
-			xmm6 = _mm_add_epi32(xmm6, _mm_srli_si128(xmm6, 4));
-
-			DATA16_RESULT(xmm6);
-
-			data_len--;
-		}
-
-		while(data_len) { /* data_len is a multiple of 2 */
-			/* 1 _mm_slli_si128 per data element less but we need shifted qlp_coeff in xmm2:xmm7 */
-			xmm6 = _mm_srli_si128(xmm3, 12);
-			xmm4 = _mm_slli_si128(xmm4, 4);
-			xmm3 = _mm_slli_si128(xmm3, 4);
-			xmm4 = _mm_or_si128(xmm4, xmm6);
-			xmm3 = _mm_insert_epi16(xmm3, curr, 1);
-
-			xmm5 = _mm_madd_epi16(xmm4, xmm7);
-			xmm6 = _mm_madd_epi16(xmm3, xmm2);
-			xmm6 = _mm_add_epi32(xmm6, xmm5);
-			xmm6 = _mm_add_epi32(xmm6, _mm_srli_si128(xmm6, 8));
-			xmm6 = _mm_add_epi32(xmm6, _mm_srli_si128(xmm6, 4));
-
-			DATA16_RESULT(xmm6);
-
-			xmm3 = _mm_insert_epi16(xmm3, curr, 0);
-
-			xmm5 = _mm_madd_epi16(xmm4, xmm1);
-			xmm6 = _mm_madd_epi16(xmm3, xmm0);
-			xmm6 = _mm_add_epi32(xmm6, xmm5);
-			xmm6 = _mm_add_epi32(xmm6, _mm_srli_si128(xmm6, 8));
-			xmm6 = _mm_add_epi32(xmm6, _mm_srli_si128(xmm6, 4));
-
-			DATA16_RESULT(xmm6);
-
-			data_len-=2;
-		}
-	} /* endif(order > 8) */
-	else
-	{
-		FLAC__int32 curr;
-		__m128i xmm0, xmm1, xmm3, xmm6;
-		xmm0 = _mm_loadu_si128((const __m128i*)(qlp_coeff+0));
-		xmm1 = _mm_loadu_si128((const __m128i*)(qlp_coeff+4));
-		xmm0 = _mm_packs_epi32(xmm0, xmm1);
-
-		xmm1 = _mm_loadu_si128((const __m128i*)(data-8));
-		xmm3 = _mm_loadu_si128((const __m128i*)(data-4));
-		xmm1 = _mm_shuffle_epi32(xmm1, _MM_SHUFFLE(0,1,2,3));
-		xmm3 = _mm_shuffle_epi32(xmm3, _MM_SHUFFLE(0,1,2,3));
-		xmm3 = _mm_packs_epi32(xmm3, xmm1);
-
-		/* xmm0: qlp_coeff
-			xmm3: data */
-
-		xmm6 = _mm_madd_epi16(xmm3, xmm0);
-		xmm6 = _mm_add_epi32(xmm6, _mm_srli_si128(xmm6, 8));
-		xmm6 = _mm_add_epi32(xmm6, _mm_srli_si128(xmm6, 4));
-
-		DATA16_RESULT(xmm6);
-
-		data_len--;
-
-		while(data_len) {
-			xmm3 = _mm_slli_si128(xmm3, 2);
-			xmm3 = _mm_insert_epi16(xmm3, curr, 0);
-
-			xmm6 = _mm_madd_epi16(xmm3, xmm0);
-			xmm6 = _mm_add_epi32(xmm6, _mm_srli_si128(xmm6, 8));
-			xmm6 = _mm_add_epi32(xmm6, _mm_srli_si128(xmm6, 4));
-
-			DATA16_RESULT(xmm6);
-
-			data_len--;
-		}
-	}
-}
-
-#endif /* defined FLAC__CPU_IA32 && !defined FLAC__HAS_NASM */
-
 #endif /* FLAC__SSE2_SUPPORTED */
 #endif /* (FLAC__CPU_IA32 || FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN */
 #endif /* FLAC__NO_ASM */

diff --git a/src/libFLAC/lpc_intrin_sse41.c b/src/libFLAC/lpc_intrin_sse41.c
index b6f4e5e..136f71c 100644
--- a/src/libFLAC/lpc_intrin_sse41.c
+++ b/src/libFLAC/lpc_intrin_sse41.c

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2000-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -34,9 +34,11 @@
 #  include <config.h>
 #endif
 
+#include "private/cpu.h"
+
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
 #ifndef FLAC__NO_ASM
-#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
+#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN
 #include "private/lpc.h"
 #ifdef FLAC__SSE4_1_SUPPORTED
 
@@ -51,10 +53,10 @@
 #define RESIDUAL64_RESULT1(xmmN) residual[i] = data[i] - _mm_cvtsi128_si32(_mm_srli_epi64(xmmN, lp_quantization))
 
 FLAC__SSE_TARGET("sse4.1")
-void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
+void FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[])
 {
 	int i;
-	__m128i cnt = _mm_cvtsi32_si128(lp_quantization);
+	const __m128i cnt = _mm_cvtsi32_si128(lp_quantization);
 
 	FLAC__ASSERT(order > 0);
 	FLAC__ASSERT(order <= 32);
@@ -65,12 +67,12 @@
 			if(order > 10) { /* order == 11, 12 */
 				if(order == 12) {
 					__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
-					xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));  // 0  0  q[1]  q[0]
-					xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2));  // 0  0  q[3]  q[2]
-					xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4));  // 0  0  q[5]  q[4]
-					xmm3 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+6));  // 0  0  q[7]  q[6]
-					xmm4 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+8));  // 0  0  q[9]  q[8]
-					xmm5 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+10)); // 0  0  q[11] q[10]
+					xmm0 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+0));  // 0  0  q[1]  q[0]
+					xmm1 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+2));  // 0  0  q[3]  q[2]
+					xmm2 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+4));  // 0  0  q[5]  q[4]
+					xmm3 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+6));  // 0  0  q[7]  q[6]
+					xmm4 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+8));  // 0  0  q[9]  q[8]
+					xmm5 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+10)); // 0  0  q[11] q[10]
 
 					xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0)); // 0  q[1]  0  q[0]
 					xmm1 = _mm_shuffle_epi32(xmm1, _MM_SHUFFLE(3,1,2,0)); // 0  q[3]  0  q[2]
@@ -83,41 +85,41 @@
 						//sum = 0;
 						//sum += qlp_coeff[11] * (FLAC__int64)data[i-12];
 						//sum += qlp_coeff[10] * (FLAC__int64)data[i-11];
-						xmm7 = _mm_loadl_epi64((const __m128i*)(data+i-12));  // 0   0        d[i-11]  d[i-12]
+						xmm7 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-12));  // 0   0        d[i-11]  d[i-12]
 						xmm7 = _mm_shuffle_epi32(xmm7, _MM_SHUFFLE(2,0,3,1)); // 0  d[i-12]   0        d[i-11]
 						xmm7 = _mm_mul_epi32(xmm7, xmm5);
 
 						//sum += qlp_coeff[9] * (FLAC__int64)data[i-10];
 						//sum += qlp_coeff[8] * (FLAC__int64)data[i-9];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-10));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-10));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epi32(xmm6, xmm4);
 						xmm7 = _mm_add_epi64(xmm7, xmm6);
 
 						//sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
 						//sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-8));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-8));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epi32(xmm6, xmm3);
 						xmm7 = _mm_add_epi64(xmm7, xmm6);
 
 						//sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
 						//sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-6));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-6));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epi32(xmm6, xmm2);
 						xmm7 = _mm_add_epi64(xmm7, xmm6);
 
 						//sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
 						//sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-4));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-4));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epi32(xmm6, xmm1);
 						xmm7 = _mm_add_epi64(xmm7, xmm6);
 
 						//sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
 						//sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-2));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epi32(xmm6, xmm0);
 						xmm7 = _mm_add_epi64(xmm7, xmm6);
@@ -128,11 +130,11 @@
 				}
 				else { /* order == 11 */
 					__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
-					xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));
-					xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2));
-					xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4));
-					xmm3 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+6));
-					xmm4 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+8));
+					xmm0 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+0));
+					xmm1 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+2));
+					xmm2 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+4));
+					xmm3 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+6));
+					xmm4 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+8));
 					xmm5 = _mm_cvtsi32_si128(qlp_coeff[10]);
 
 					xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0));
@@ -149,35 +151,35 @@
 
 						//sum += qlp_coeff[9] * (FLAC__int64)data[i-10];
 						//sum += qlp_coeff[8] * (FLAC__int64)data[i-9];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-10));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-10));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epi32(xmm6, xmm4);
 						xmm7 = _mm_add_epi64(xmm7, xmm6);
 
 						//sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
 						//sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-8));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-8));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epi32(xmm6, xmm3);
 						xmm7 = _mm_add_epi64(xmm7, xmm6);
 
 						//sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
 						//sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-6));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-6));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epi32(xmm6, xmm2);
 						xmm7 = _mm_add_epi64(xmm7, xmm6);
 
 						//sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
 						//sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-4));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-4));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epi32(xmm6, xmm1);
 						xmm7 = _mm_add_epi64(xmm7, xmm6);
 
 						//sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
 						//sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-2));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epi32(xmm6, xmm0);
 						xmm7 = _mm_add_epi64(xmm7, xmm6);
@@ -190,11 +192,11 @@
 			else { /* order == 9, 10 */
 				if(order == 10) {
 					__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm6, xmm7;
-					xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));
-					xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2));
-					xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4));
-					xmm3 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+6));
-					xmm4 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+8));
+					xmm0 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+0));
+					xmm1 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+2));
+					xmm2 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+4));
+					xmm3 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+6));
+					xmm4 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+8));
 
 					xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0));
 					xmm1 = _mm_shuffle_epi32(xmm1, _MM_SHUFFLE(3,1,2,0));
@@ -206,34 +208,34 @@
 						//sum = 0;
 						//sum += qlp_coeff[9] * (FLAC__int64)data[i-10];
 						//sum += qlp_coeff[8] * (FLAC__int64)data[i-9];
-						xmm7 = _mm_loadl_epi64((const __m128i*)(data+i-10));
+						xmm7 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-10));
 						xmm7 = _mm_shuffle_epi32(xmm7, _MM_SHUFFLE(2,0,3,1));
 						xmm7 = _mm_mul_epi32(xmm7, xmm4);
 
 						//sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
 						//sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-8));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-8));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epi32(xmm6, xmm3);
 						xmm7 = _mm_add_epi64(xmm7, xmm6);
 
 						//sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
 						//sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-6));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-6));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epi32(xmm6, xmm2);
 						xmm7 = _mm_add_epi64(xmm7, xmm6);
 
 						//sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
 						//sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-4));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-4));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epi32(xmm6, xmm1);
 						xmm7 = _mm_add_epi64(xmm7, xmm6);
 
 						//sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
 						//sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-2));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epi32(xmm6, xmm0);
 						xmm7 = _mm_add_epi64(xmm7, xmm6);
@@ -244,10 +246,10 @@
 				}
 				else { /* order == 9 */
 					__m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm6, xmm7;
-					xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));
-					xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2));
-					xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4));
-					xmm3 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+6));
+					xmm0 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+0));
+					xmm1 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+2));
+					xmm2 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+4));
+					xmm3 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+6));
 					xmm4 = _mm_cvtsi32_si128(qlp_coeff[8]);
 
 					xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0));
@@ -263,28 +265,28 @@
 
 						//sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
 						//sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-8));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-8));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epi32(xmm6, xmm3);
 						xmm7 = _mm_add_epi64(xmm7, xmm6);
 
 						//sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
 						//sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-6));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-6));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epi32(xmm6, xmm2);
 						xmm7 = _mm_add_epi64(xmm7, xmm6);
 
 						//sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
 						//sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-4));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-4));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epi32(xmm6, xmm1);
 						xmm7 = _mm_add_epi64(xmm7, xmm6);
 
 						//sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
 						//sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-2));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epi32(xmm6, xmm0);
 						xmm7 = _mm_add_epi64(xmm7, xmm6);
@@ -299,10 +301,10 @@
 			if(order > 6) { /* order == 7, 8 */
 				if(order == 8) {
 					__m128i xmm0, xmm1, xmm2, xmm3, xmm6, xmm7;
-					xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));
-					xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2));
-					xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4));
-					xmm3 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+6));
+					xmm0 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+0));
+					xmm1 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+2));
+					xmm2 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+4));
+					xmm3 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+6));
 
 					xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0));
 					xmm1 = _mm_shuffle_epi32(xmm1, _MM_SHUFFLE(3,1,2,0));
@@ -313,27 +315,27 @@
 						//sum = 0;
 						//sum += qlp_coeff[7] * (FLAC__int64)data[i-8];
 						//sum += qlp_coeff[6] * (FLAC__int64)data[i-7];
-						xmm7 = _mm_loadl_epi64((const __m128i*)(data+i-8));
+						xmm7 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-8));
 						xmm7 = _mm_shuffle_epi32(xmm7, _MM_SHUFFLE(2,0,3,1));
 						xmm7 = _mm_mul_epi32(xmm7, xmm3);
 
 						//sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
 						//sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-6));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-6));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epi32(xmm6, xmm2);
 						xmm7 = _mm_add_epi64(xmm7, xmm6);
 
 						//sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
 						//sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-4));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-4));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epi32(xmm6, xmm1);
 						xmm7 = _mm_add_epi64(xmm7, xmm6);
 
 						//sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
 						//sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-2));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epi32(xmm6, xmm0);
 						xmm7 = _mm_add_epi64(xmm7, xmm6);
@@ -344,9 +346,9 @@
 				}
 				else { /* order == 7 */
 					__m128i xmm0, xmm1, xmm2, xmm3, xmm6, xmm7;
-					xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));
-					xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2));
-					xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4));
+					xmm0 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+0));
+					xmm1 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+2));
+					xmm2 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+4));
 					xmm3 = _mm_cvtsi32_si128(qlp_coeff[6]);
 
 					xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0));
@@ -361,21 +363,21 @@
 
 						//sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
 						//sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-6));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-6));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epi32(xmm6, xmm2);
 						xmm7 = _mm_add_epi64(xmm7, xmm6);
 
 						//sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
 						//sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-4));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-4));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epi32(xmm6, xmm1);
 						xmm7 = _mm_add_epi64(xmm7, xmm6);
 
 						//sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
 						//sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-2));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epi32(xmm6, xmm0);
 						xmm7 = _mm_add_epi64(xmm7, xmm6);
@@ -388,9 +390,9 @@
 			else { /* order == 5, 6 */
 				if(order == 6) {
 					__m128i xmm0, xmm1, xmm2, xmm6, xmm7;
-					xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));
-					xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2));
-					xmm2 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4));
+					xmm0 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+0));
+					xmm1 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+2));
+					xmm2 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+4));
 
 					xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0));
 					xmm1 = _mm_shuffle_epi32(xmm1, _MM_SHUFFLE(3,1,2,0));
@@ -400,20 +402,20 @@
 						//sum = 0;
 						//sum += qlp_coeff[5] * (FLAC__int64)data[i-6];
 						//sum += qlp_coeff[4] * (FLAC__int64)data[i-5];
-						xmm7 = _mm_loadl_epi64((const __m128i*)(data+i-6));
+						xmm7 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-6));
 						xmm7 = _mm_shuffle_epi32(xmm7, _MM_SHUFFLE(2,0,3,1));
 						xmm7 = _mm_mul_epi32(xmm7, xmm2);
 
 						//sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
 						//sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-4));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-4));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epi32(xmm6, xmm1);
 						xmm7 = _mm_add_epi64(xmm7, xmm6);
 
 						//sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
 						//sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-2));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epi32(xmm6, xmm0);
 						xmm7 = _mm_add_epi64(xmm7, xmm6);
@@ -424,8 +426,8 @@
 				}
 				else { /* order == 5 */
 					__m128i xmm0, xmm1, xmm2, xmm6, xmm7;
-					xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));
-					xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2));
+					xmm0 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+0));
+					xmm1 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+2));
 					xmm2 = _mm_cvtsi32_si128(qlp_coeff[4]);
 
 					xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0));
@@ -439,14 +441,14 @@
 
 						//sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
 						//sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-4));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-4));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epi32(xmm6, xmm1);
 						xmm7 = _mm_add_epi64(xmm7, xmm6);
 
 						//sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
 						//sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-2));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epi32(xmm6, xmm0);
 						xmm7 = _mm_add_epi64(xmm7, xmm6);
@@ -461,8 +463,8 @@
 			if(order > 2) { /* order == 3, 4 */
 				if(order == 4) {
 					__m128i xmm0, xmm1, xmm6, xmm7;
-					xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));
-					xmm1 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2));
+					xmm0 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+0));
+					xmm1 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+2));
 
 					xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0));
 					xmm1 = _mm_shuffle_epi32(xmm1, _MM_SHUFFLE(3,1,2,0));
@@ -471,13 +473,13 @@
 						//sum = 0;
 						//sum += qlp_coeff[3] * (FLAC__int64)data[i-4];
 						//sum += qlp_coeff[2] * (FLAC__int64)data[i-3];
-						xmm7 = _mm_loadl_epi64((const __m128i*)(data+i-4));
+						xmm7 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-4));
 						xmm7 = _mm_shuffle_epi32(xmm7, _MM_SHUFFLE(2,0,3,1));
 						xmm7 = _mm_mul_epi32(xmm7, xmm1);
 
 						//sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
 						//sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-2));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epi32(xmm6, xmm0);
 						xmm7 = _mm_add_epi64(xmm7, xmm6);
@@ -488,7 +490,7 @@
 				}
 				else { /* order == 3 */
 					__m128i xmm0, xmm1, xmm6, xmm7;
-					xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));
+					xmm0 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+0));
 					xmm1 = _mm_cvtsi32_si128(qlp_coeff[2]);
 
 					xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0));
@@ -501,7 +503,7 @@
 
 						//sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
 						//sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
-						xmm6 = _mm_loadl_epi64((const __m128i*)(data+i-2));
+						xmm6 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-2));
 						xmm6 = _mm_shuffle_epi32(xmm6, _MM_SHUFFLE(2,0,3,1));
 						xmm6 = _mm_mul_epi32(xmm6, xmm0);
 						xmm7 = _mm_add_epi64(xmm7, xmm6);
@@ -514,14 +516,14 @@
 			else { /* order == 1, 2 */
 				if(order == 2) {
 					__m128i xmm0, xmm7;
-					xmm0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));
+					xmm0 = _mm_loadl_epi64((const __m128i*)(const void*)(qlp_coeff+0));
 					xmm0 = _mm_shuffle_epi32(xmm0, _MM_SHUFFLE(3,1,2,0));
 
 					for(i = 0; i < (int)data_len; i++) {
 						//sum = 0;
 						//sum += qlp_coeff[1] * (FLAC__int64)data[i-2];
 						//sum += qlp_coeff[0] * (FLAC__int64)data[i-1];
-						xmm7 = _mm_loadl_epi64((const __m128i*)(data+i-2));
+						xmm7 = _mm_loadl_epi64((const __m128i*)(const void*)(data+i-2));
 						xmm7 = _mm_shuffle_epi32(xmm7, _MM_SHUFFLE(2,0,3,1));
 						xmm7 = _mm_mul_epi32(xmm7, xmm0);
 
@@ -548,25 +550,25 @@
 		for(i = 0; i < (int)data_len; i++) {
 			sum = 0;
 			switch(order) {
-				case 32: sum += qlp_coeff[31] * (FLAC__int64)data[i-32];
-				case 31: sum += qlp_coeff[30] * (FLAC__int64)data[i-31];
-				case 30: sum += qlp_coeff[29] * (FLAC__int64)data[i-30];
-				case 29: sum += qlp_coeff[28] * (FLAC__int64)data[i-29];
-				case 28: sum += qlp_coeff[27] * (FLAC__int64)data[i-28];
-				case 27: sum += qlp_coeff[26] * (FLAC__int64)data[i-27];
-				case 26: sum += qlp_coeff[25] * (FLAC__int64)data[i-26];
-				case 25: sum += qlp_coeff[24] * (FLAC__int64)data[i-25];
-				case 24: sum += qlp_coeff[23] * (FLAC__int64)data[i-24];
-				case 23: sum += qlp_coeff[22] * (FLAC__int64)data[i-23];
-				case 22: sum += qlp_coeff[21] * (FLAC__int64)data[i-22];
-				case 21: sum += qlp_coeff[20] * (FLAC__int64)data[i-21];
-				case 20: sum += qlp_coeff[19] * (FLAC__int64)data[i-20];
-				case 19: sum += qlp_coeff[18] * (FLAC__int64)data[i-19];
-				case 18: sum += qlp_coeff[17] * (FLAC__int64)data[i-18];
-				case 17: sum += qlp_coeff[16] * (FLAC__int64)data[i-17];
-				case 16: sum += qlp_coeff[15] * (FLAC__int64)data[i-16];
-				case 15: sum += qlp_coeff[14] * (FLAC__int64)data[i-15];
-				case 14: sum += qlp_coeff[13] * (FLAC__int64)data[i-14];
+				case 32: sum += qlp_coeff[31] * (FLAC__int64)data[i-32]; /* Falls through. */
+				case 31: sum += qlp_coeff[30] * (FLAC__int64)data[i-31]; /* Falls through. */
+				case 30: sum += qlp_coeff[29] * (FLAC__int64)data[i-30]; /* Falls through. */
+				case 29: sum += qlp_coeff[28] * (FLAC__int64)data[i-29]; /* Falls through. */
+				case 28: sum += qlp_coeff[27] * (FLAC__int64)data[i-28]; /* Falls through. */
+				case 27: sum += qlp_coeff[26] * (FLAC__int64)data[i-27]; /* Falls through. */
+				case 26: sum += qlp_coeff[25] * (FLAC__int64)data[i-26]; /* Falls through. */
+				case 25: sum += qlp_coeff[24] * (FLAC__int64)data[i-25]; /* Falls through. */
+				case 24: sum += qlp_coeff[23] * (FLAC__int64)data[i-24]; /* Falls through. */
+				case 23: sum += qlp_coeff[22] * (FLAC__int64)data[i-23]; /* Falls through. */
+				case 22: sum += qlp_coeff[21] * (FLAC__int64)data[i-22]; /* Falls through. */
+				case 21: sum += qlp_coeff[20] * (FLAC__int64)data[i-21]; /* Falls through. */
+				case 20: sum += qlp_coeff[19] * (FLAC__int64)data[i-20]; /* Falls through. */
+				case 19: sum += qlp_coeff[18] * (FLAC__int64)data[i-19]; /* Falls through. */
+				case 18: sum += qlp_coeff[17] * (FLAC__int64)data[i-18]; /* Falls through. */
+				case 17: sum += qlp_coeff[16] * (FLAC__int64)data[i-17]; /* Falls through. */
+				case 16: sum += qlp_coeff[15] * (FLAC__int64)data[i-16]; /* Falls through. */
+				case 15: sum += qlp_coeff[14] * (FLAC__int64)data[i-15]; /* Falls through. */
+				case 14: sum += qlp_coeff[13] * (FLAC__int64)data[i-14]; /* Falls through. */
 				case 13: sum += qlp_coeff[12] * (FLAC__int64)data[i-13];
 				         sum += qlp_coeff[11] * (FLAC__int64)data[i-12];
 				         sum += qlp_coeff[10] * (FLAC__int64)data[i-11];
@@ -586,378 +588,14 @@
 	}
 }
 
-FLAC__SSE_TARGET("sse4.1")
-void FLAC__lpc_restore_signal_wide_intrin_sse41(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[])
-{
-	int i;
-	__m128i cnt = _mm_cvtsi32_si128(lp_quantization);
-
-	if (!data_len)
-		return;
-
-	FLAC__ASSERT(order > 0);
-	FLAC__ASSERT(order <= 32);
-	FLAC__ASSERT(lp_quantization <= 32); /* there's no _mm_sra_epi64() so we have to use _mm_srl_epi64() */
-
-	if(order <= 12) {
-		if(order > 8) { /* order == 9, 10, 11, 12 */
-			if(order > 10) { /* order == 11, 12 */
-				__m128i qlp[6], dat[6];
-				__m128i summ, temp;
-				qlp[0] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));	// 0  0  q[1]  q[0]
-				qlp[1] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2));	// 0  0  q[3]  q[2]
-				qlp[2] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4));	// 0  0  q[5]  q[4]
-				qlp[3] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+6));	// 0  0  q[7]  q[6]
-				qlp[4] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+8));	// 0  0  q[9]  q[8]
-				if (order == 12)
-					qlp[5] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+10));	// 0  0  q[11] q[10]
-				else
-					qlp[5] = _mm_cvtsi32_si128(qlp_coeff[10]);					// 0  0  0     q[10]
-
-				qlp[0] = _mm_shuffle_epi32(qlp[0], _MM_SHUFFLE(2,0,3,1));	// 0  q[0]  0  q[1]
-				qlp[1] = _mm_shuffle_epi32(qlp[1], _MM_SHUFFLE(2,0,3,1));	// 0  q[2]  0  q[3]
-				qlp[2] = _mm_shuffle_epi32(qlp[2], _MM_SHUFFLE(2,0,3,1));	// 0  q[4]  0  q[5]
-				qlp[3] = _mm_shuffle_epi32(qlp[3], _MM_SHUFFLE(2,0,3,1));	// 0  q[5]  0  q[7]
-				qlp[4] = _mm_shuffle_epi32(qlp[4], _MM_SHUFFLE(2,0,3,1));	// 0  q[8]  0  q[9]
-				qlp[5] = _mm_shuffle_epi32(qlp[5], _MM_SHUFFLE(2,0,3,1));	// 0  q[10] 0  q[11]
-
-				dat[5] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-12)));	// ?  d[i-11]  ?  d[i-12]
-				dat[4] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-10)));	// ?  d[i-9]   ?  d[i-10]
-				dat[3] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-8 )));	// ?  d[i-7]   ?  d[i-8]
-				dat[2] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-6 )));	// ?  d[i-5]   ?  d[i-6]
-				dat[1] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-4 )));	// ?  d[i-3]   ?  d[i-4]
-				dat[0] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-2 )));	// ?  d[i-1]   ?  d[i-2]
-
-				summ =                     _mm_mul_epi32(dat[5], qlp[5]) ;
-				summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[4], qlp[4]));
-				summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[3], qlp[3]));
-				summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[2], qlp[2]));
-				summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[1], qlp[1]));
-				summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[0], qlp[0]));
-
-				summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8));	// ?_64  sum_64
-				summ = _mm_srl_epi64(summ, cnt);						// ?_64  (sum >> lp_quantization)_64  ==  ?_32  ?_32  ?_32  (sum >> lp_quantization)_32
-				temp = _mm_cvtsi32_si128(residual[0]);					// 0  0  0  r[i]
-				temp = _mm_add_epi32(temp, summ);						// ?  ?  ?  d[i]
-				data[0] = _mm_cvtsi128_si32(temp);
-
-				for(i = 1; i < (int)data_len; i++) {
-					dat[5] = _mm_alignr_epi8(dat[4], dat[5], 8);	//  ?  d[i-10] ?  d[i-11]
-					dat[4] = _mm_alignr_epi8(dat[3], dat[4], 8);	//  ?  d[i-8]  ?  d[i-9]
-					dat[3] = _mm_alignr_epi8(dat[2], dat[3], 8);	//  ?  d[i-6]  ?  d[i-7]
-					dat[2] = _mm_alignr_epi8(dat[1], dat[2], 8);	//  ?  d[i-4]  ?  d[i-5]
-					dat[1] = _mm_alignr_epi8(dat[0], dat[1], 8);	//  ?  d[i-2]  ?  d[i-3]
-					dat[0] = _mm_alignr_epi8(temp,   dat[0], 8);	//  ?  d[i  ]  ?  d[i-1]
-
-					summ =                     _mm_mul_epi32(dat[5], qlp[5]) ;
-					summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[4], qlp[4]));
-					summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[3], qlp[3]));
-					summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[2], qlp[2]));
-					summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[1], qlp[1]));
-					summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[0], qlp[0]));
-
-					summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8));	// ?_64  sum_64
-					summ = _mm_srl_epi64(summ, cnt);						// ?_64  (sum >> lp_quantization)_64  ==  ?_32  ?_32  ?_32  (sum >> lp_quantization)_32
-					temp = _mm_cvtsi32_si128(residual[i]);					// 0  0  0  r[i]
-					temp = _mm_add_epi32(temp, summ);						// ?  ?  ?  d[i]
-					data[i] = _mm_cvtsi128_si32(temp);
-				}
-			}
-			else { /* order == 9, 10 */
-				__m128i qlp[5], dat[5];
-				__m128i summ, temp;
-				qlp[0] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));
-				qlp[1] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2));
-				qlp[2] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4));
-				qlp[3] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+6));
-				if (order == 10)
-					qlp[4] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+8));
-				else
-					qlp[4] = _mm_cvtsi32_si128(qlp_coeff[8]);
-
-				qlp[0] = _mm_shuffle_epi32(qlp[0], _MM_SHUFFLE(2,0,3,1));
-				qlp[1] = _mm_shuffle_epi32(qlp[1], _MM_SHUFFLE(2,0,3,1));
-				qlp[2] = _mm_shuffle_epi32(qlp[2], _MM_SHUFFLE(2,0,3,1));
-				qlp[3] = _mm_shuffle_epi32(qlp[3], _MM_SHUFFLE(2,0,3,1));
-				qlp[4] = _mm_shuffle_epi32(qlp[4], _MM_SHUFFLE(2,0,3,1));
-
-				dat[4] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-10)));
-				dat[3] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-8 )));
-				dat[2] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-6 )));
-				dat[1] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-4 )));
-				dat[0] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-2 )));
-
-				summ =                     _mm_mul_epi32(dat[4], qlp[4]) ;
-				summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[3], qlp[3]));
-				summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[2], qlp[2]));
-				summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[1], qlp[1]));
-				summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[0], qlp[0]));
-
-				summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8));
-				summ = _mm_srl_epi64(summ, cnt);
-				temp = _mm_cvtsi32_si128(residual[0]);
-				temp = _mm_add_epi32(temp, summ);
-				data[0] = _mm_cvtsi128_si32(temp);
-
-				for(i = 1; i < (int)data_len; i++) {
-					dat[4] = _mm_alignr_epi8(dat[3], dat[4], 8);
-					dat[3] = _mm_alignr_epi8(dat[2], dat[3], 8);
-					dat[2] = _mm_alignr_epi8(dat[1], dat[2], 8);
-					dat[1] = _mm_alignr_epi8(dat[0], dat[1], 8);
-					dat[0] = _mm_alignr_epi8(temp,   dat[0], 8);
-
-					summ =                     _mm_mul_epi32(dat[4], qlp[4]) ;
-					summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[3], qlp[3]));
-					summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[2], qlp[2]));
-					summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[1], qlp[1]));
-					summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[0], qlp[0]));
-
-					summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8));
-					summ = _mm_srl_epi64(summ, cnt);
-					temp = _mm_cvtsi32_si128(residual[i]);
-					temp = _mm_add_epi32(temp, summ);
-					data[i] = _mm_cvtsi128_si32(temp);
-				}
-			}
-		}
-		else if(order > 4) { /* order == 5, 6, 7, 8 */
-			if(order > 6) { /* order == 7, 8 */
-				__m128i qlp[4], dat[4];
-				__m128i summ, temp;
-				qlp[0] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));
-				qlp[1] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2));
-				qlp[2] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4));
-				if (order == 8)
-					qlp[3] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+6));
-				else
-					qlp[3] = _mm_cvtsi32_si128(qlp_coeff[6]);
-
-				qlp[0] = _mm_shuffle_epi32(qlp[0], _MM_SHUFFLE(2,0,3,1));
-				qlp[1] = _mm_shuffle_epi32(qlp[1], _MM_SHUFFLE(2,0,3,1));
-				qlp[2] = _mm_shuffle_epi32(qlp[2], _MM_SHUFFLE(2,0,3,1));
-				qlp[3] = _mm_shuffle_epi32(qlp[3], _MM_SHUFFLE(2,0,3,1));
-
-				dat[3] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-8 )));
-				dat[2] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-6 )));
-				dat[1] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-4 )));
-				dat[0] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-2 )));
-
-				summ =                     _mm_mul_epi32(dat[3], qlp[3]) ;
-				summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[2], qlp[2]));
-				summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[1], qlp[1]));
-				summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[0], qlp[0]));
-
-				summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8));
-				summ = _mm_srl_epi64(summ, cnt);
-				temp = _mm_cvtsi32_si128(residual[0]);
-				temp = _mm_add_epi32(temp, summ);
-				data[0] = _mm_cvtsi128_si32(temp);
-
-				for(i = 1; i < (int)data_len; i++) {
-					dat[3] = _mm_alignr_epi8(dat[2], dat[3], 8);
-					dat[2] = _mm_alignr_epi8(dat[1], dat[2], 8);
-					dat[1] = _mm_alignr_epi8(dat[0], dat[1], 8);
-					dat[0] = _mm_alignr_epi8(temp,   dat[0], 8);
-
-					summ =                     _mm_mul_epi32(dat[3], qlp[3]) ;
-					summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[2], qlp[2]));
-					summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[1], qlp[1]));
-					summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[0], qlp[0]));
-
-					summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8));
-					summ = _mm_srl_epi64(summ, cnt);
-					temp = _mm_cvtsi32_si128(residual[i]);
-					temp = _mm_add_epi32(temp, summ);
-					data[i] = _mm_cvtsi128_si32(temp);
-				}
-			}
-			else { /* order == 5, 6 */
-				__m128i qlp[3], dat[3];
-				__m128i summ, temp;
-				qlp[0] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));
-				qlp[1] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2));
-				if (order == 6)
-					qlp[2] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+4));
-				else
-					qlp[2] = _mm_cvtsi32_si128(qlp_coeff[4]);
-
-				qlp[0] = _mm_shuffle_epi32(qlp[0], _MM_SHUFFLE(2,0,3,1));
-				qlp[1] = _mm_shuffle_epi32(qlp[1], _MM_SHUFFLE(2,0,3,1));
-				qlp[2] = _mm_shuffle_epi32(qlp[2], _MM_SHUFFLE(2,0,3,1));
-
-				dat[2] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-6 )));
-				dat[1] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-4 )));
-				dat[0] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-2 )));
-
-				summ =                     _mm_mul_epi32(dat[2], qlp[2]) ;
-				summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[1], qlp[1]));
-				summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[0], qlp[0]));
-
-				summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8));
-				summ = _mm_srl_epi64(summ, cnt);
-				temp = _mm_cvtsi32_si128(residual[0]);
-				temp = _mm_add_epi32(temp, summ);
-				data[0] = _mm_cvtsi128_si32(temp);
-
-				for(i = 1; i < (int)data_len; i++) {
-					dat[2] = _mm_alignr_epi8(dat[1], dat[2], 8);
-					dat[1] = _mm_alignr_epi8(dat[0], dat[1], 8);
-					dat[0] = _mm_alignr_epi8(temp,   dat[0], 8);
-
-					summ =                     _mm_mul_epi32(dat[2], qlp[2]) ;
-					summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[1], qlp[1]));
-					summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[0], qlp[0]));
-
-					summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8));
-					summ = _mm_srl_epi64(summ, cnt);
-					temp = _mm_cvtsi32_si128(residual[i]);
-					temp = _mm_add_epi32(temp, summ);
-					data[i] = _mm_cvtsi128_si32(temp);
-				}
-			}
-		}
-		else { /* order == 1, 2, 3, 4 */
-			if(order > 2) { /* order == 3, 4 */
-				__m128i qlp[2], dat[2];
-				__m128i summ, temp;
-				qlp[0] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+0));
-				if (order == 4)
-					qlp[1] = _mm_loadl_epi64((const __m128i*)(qlp_coeff+2));
-				else
-					qlp[1] = _mm_cvtsi32_si128(qlp_coeff[2]);
-
-				qlp[0] = _mm_shuffle_epi32(qlp[0], _MM_SHUFFLE(2,0,3,1));
-				qlp[1] = _mm_shuffle_epi32(qlp[1], _MM_SHUFFLE(2,0,3,1));
-
-				dat[1] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-4 )));
-				dat[0] = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-2 )));
-
-				summ =                     _mm_mul_epi32(dat[1], qlp[1]) ;
-				summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[0], qlp[0]));
-
-				summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8));
-				summ = _mm_srl_epi64(summ, cnt);
-				temp = _mm_cvtsi32_si128(residual[0]);
-				temp = _mm_add_epi32(temp, summ);
-				data[0] = _mm_cvtsi128_si32(temp);
-
-				for(i = 1; i < (int)data_len; i++) {
-					dat[1] = _mm_alignr_epi8(dat[0], dat[1], 8);
-					dat[0] = _mm_alignr_epi8(temp,   dat[0], 8);
-
-					summ =                     _mm_mul_epi32(dat[1], qlp[1]) ;
-					summ = _mm_add_epi64(summ, _mm_mul_epi32(dat[0], qlp[0]));
-
-					summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8));
-					summ = _mm_srl_epi64(summ, cnt);
-					temp = _mm_cvtsi32_si128(residual[i]);
-					temp = _mm_add_epi32(temp, summ);
-					data[i] = _mm_cvtsi128_si32(temp);
-				}
-			}
-			else { /* order == 1, 2 */
-				if(order == 2) {
-					__m128i qlp0, dat0;
-					__m128i summ, temp;
-					qlp0 = _mm_loadl_epi64((const __m128i*)(qlp_coeff));
-					qlp0 = _mm_shuffle_epi32(qlp0, _MM_SHUFFLE(2,0,3,1));
-
-					dat0 = _mm_cvtepu32_epi64(_mm_loadl_epi64((const __m128i*)(data-2 )));
-
-					summ = _mm_mul_epi32(dat0, qlp0) ;
-
-					summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8));
-					summ = _mm_srl_epi64(summ, cnt);
-					temp = _mm_cvtsi32_si128(residual[0]);
-					temp = _mm_add_epi32(temp, summ);
-					data[0] = _mm_cvtsi128_si32(temp);
-
-					for(i = 1; i < (int)data_len; i++) {
-						dat0 = _mm_alignr_epi8(temp, dat0, 8);
-
-						summ = _mm_mul_epi32(dat0, qlp0) ;
-
-						summ = _mm_add_epi64(summ, _mm_srli_si128(summ, 8));
-						summ = _mm_srl_epi64(summ, cnt);
-						temp = _mm_cvtsi32_si128(residual[i]);
-						temp = _mm_add_epi32(temp, summ);
-						data[i] = _mm_cvtsi128_si32(temp);
-					}
-				}
-				else { /* order == 1 */
-					__m128i qlp0;
-					__m128i summ, temp;
-					qlp0 = _mm_cvtsi32_si128(qlp_coeff[0]);
-					temp = _mm_cvtsi32_si128(data[-1]);
-
-					summ = _mm_mul_epi32(temp, qlp0);
-					summ = _mm_srl_epi64(summ, cnt);
-					temp = _mm_cvtsi32_si128(residual[0]);
-					temp = _mm_add_epi32(temp, summ);
-					data[0] = _mm_cvtsi128_si32(temp);
-
-					for(i = 1; i < (int)data_len; i++) {
-						summ = _mm_mul_epi32(temp, qlp0) ;
-						summ = _mm_srl_epi64(summ, cnt);
-						temp = _mm_cvtsi32_si128(residual[i]);
-						temp = _mm_add_epi32(temp, summ);
-						data[i] = _mm_cvtsi128_si32(temp);
-					}
-				}
-			}
-		}
-	}
-	else { /* order > 12 */
-		FLAC__int64 sum;
-		for(i = 0; i < (int)data_len; i++) {
-			sum = 0;
-			switch(order) {
-				case 32: sum += qlp_coeff[31] * (FLAC__int64)data[i-32];
-				case 31: sum += qlp_coeff[30] * (FLAC__int64)data[i-31];
-				case 30: sum += qlp_coeff[29] * (FLAC__int64)data[i-30];
-				case 29: sum += qlp_coeff[28] * (FLAC__int64)data[i-29];
-				case 28: sum += qlp_coeff[27] * (FLAC__int64)data[i-28];
-				case 27: sum += qlp_coeff[26] * (FLAC__int64)data[i-27];
-				case 26: sum += qlp_coeff[25] * (FLAC__int64)data[i-26];
-				case 25: sum += qlp_coeff[24] * (FLAC__int64)data[i-25];
-				case 24: sum += qlp_coeff[23] * (FLAC__int64)data[i-24];
-				case 23: sum += qlp_coeff[22] * (FLAC__int64)data[i-23];
-				case 22: sum += qlp_coeff[21] * (FLAC__int64)data[i-22];
-				case 21: sum += qlp_coeff[20] * (FLAC__int64)data[i-21];
-				case 20: sum += qlp_coeff[19] * (FLAC__int64)data[i-20];
-				case 19: sum += qlp_coeff[18] * (FLAC__int64)data[i-19];
-				case 18: sum += qlp_coeff[17] * (FLAC__int64)data[i-18];
-				case 17: sum += qlp_coeff[16] * (FLAC__int64)data[i-17];
-				case 16: sum += qlp_coeff[15] * (FLAC__int64)data[i-16];
-				case 15: sum += qlp_coeff[14] * (FLAC__int64)data[i-15];
-				case 14: sum += qlp_coeff[13] * (FLAC__int64)data[i-14];
-				case 13: sum += qlp_coeff[12] * (FLAC__int64)data[i-13];
-				         sum += qlp_coeff[11] * (FLAC__int64)data[i-12];
-				         sum += qlp_coeff[10] * (FLAC__int64)data[i-11];
-				         sum += qlp_coeff[ 9] * (FLAC__int64)data[i-10];
-				         sum += qlp_coeff[ 8] * (FLAC__int64)data[i- 9];
-				         sum += qlp_coeff[ 7] * (FLAC__int64)data[i- 8];
-				         sum += qlp_coeff[ 6] * (FLAC__int64)data[i- 7];
-				         sum += qlp_coeff[ 5] * (FLAC__int64)data[i- 6];
-				         sum += qlp_coeff[ 4] * (FLAC__int64)data[i- 5];
-				         sum += qlp_coeff[ 3] * (FLAC__int64)data[i- 4];
-				         sum += qlp_coeff[ 2] * (FLAC__int64)data[i- 3];
-				         sum += qlp_coeff[ 1] * (FLAC__int64)data[i- 2];
-				         sum += qlp_coeff[ 0] * (FLAC__int64)data[i- 1];
-			}
-			data[i] = residual[i] + (FLAC__int32)(sum >> lp_quantization);
-		}
-	}
-}
-
 #endif /* defined FLAC__CPU_IA32 */
 
 FLAC__SSE_TARGET("sse4.1")
-void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse41(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[])
+void FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse41(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[])
 {
 	int i;
 	FLAC__int32 sum;
-	__m128i cnt = _mm_cvtsi32_si128(lp_quantization);
+	const __m128i cnt = _mm_cvtsi32_si128(lp_quantization);
 
 	FLAC__ASSERT(order > 0);
 	FLAC__ASSERT(order <= 32);
@@ -982,20 +620,20 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m128i summ, mull;
-						summ = _mm_mullo_epi32(q11, _mm_loadu_si128((const __m128i*)(data+i-12)));
-						mull = _mm_mullo_epi32(q10, _mm_loadu_si128((const __m128i*)(data+i-11))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q9, _mm_loadu_si128((const __m128i*)(data+i-10))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q8, _mm_loadu_si128((const __m128i*)(data+i-9))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q7, _mm_loadu_si128((const __m128i*)(data+i-8))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q6, _mm_loadu_si128((const __m128i*)(data+i-7))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q5, _mm_loadu_si128((const __m128i*)(data+i-6))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
+						summ = _mm_mullo_epi32(q11, _mm_loadu_si128((const __m128i*)(const void*)(data+i-12)));
+						mull = _mm_mullo_epi32(q10, _mm_loadu_si128((const __m128i*)(const void*)(data+i-11))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q9, _mm_loadu_si128((const __m128i*)(const void*)(data+i-10))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q8, _mm_loadu_si128((const __m128i*)(const void*)(data+i-9))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q7, _mm_loadu_si128((const __m128i*)(const void*)(data+i-8))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q6, _mm_loadu_si128((const __m128i*)(const void*)(data+i-7))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q5, _mm_loadu_si128((const __m128i*)(const void*)(data+i-6))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q4, _mm_loadu_si128((const __m128i*)(const void*)(data+i-5))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q3, _mm_loadu_si128((const __m128i*)(const void*)(data+i-4))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q2, _mm_loadu_si128((const __m128i*)(const void*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q1, _mm_loadu_si128((const __m128i*)(const void*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(const void*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
 						summ = _mm_sra_epi32(summ, cnt);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), summ));
 					}
 				}
 				else { /* order == 11 */
@@ -1014,19 +652,19 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m128i summ, mull;
-						summ = _mm_mullo_epi32(q10, _mm_loadu_si128((const __m128i*)(data+i-11)));
-						mull = _mm_mullo_epi32(q9, _mm_loadu_si128((const __m128i*)(data+i-10))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q8, _mm_loadu_si128((const __m128i*)(data+i-9))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q7, _mm_loadu_si128((const __m128i*)(data+i-8))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q6, _mm_loadu_si128((const __m128i*)(data+i-7))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q5, _mm_loadu_si128((const __m128i*)(data+i-6))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
+						summ = _mm_mullo_epi32(q10, _mm_loadu_si128((const __m128i*)(const void*)(data+i-11)));
+						mull = _mm_mullo_epi32(q9, _mm_loadu_si128((const __m128i*)(const void*)(data+i-10))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q8, _mm_loadu_si128((const __m128i*)(const void*)(data+i-9))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q7, _mm_loadu_si128((const __m128i*)(const void*)(data+i-8))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q6, _mm_loadu_si128((const __m128i*)(const void*)(data+i-7))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q5, _mm_loadu_si128((const __m128i*)(const void*)(data+i-6))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q4, _mm_loadu_si128((const __m128i*)(const void*)(data+i-5))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q3, _mm_loadu_si128((const __m128i*)(const void*)(data+i-4))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q2, _mm_loadu_si128((const __m128i*)(const void*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q1, _mm_loadu_si128((const __m128i*)(const void*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(const void*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
 						summ = _mm_sra_epi32(summ, cnt);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), summ));
 					}
 				}
 			}
@@ -1046,18 +684,18 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m128i summ, mull;
-						summ = _mm_mullo_epi32(q9, _mm_loadu_si128((const __m128i*)(data+i-10)));
-						mull = _mm_mullo_epi32(q8, _mm_loadu_si128((const __m128i*)(data+i-9))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q7, _mm_loadu_si128((const __m128i*)(data+i-8))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q6, _mm_loadu_si128((const __m128i*)(data+i-7))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q5, _mm_loadu_si128((const __m128i*)(data+i-6))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
+						summ = _mm_mullo_epi32(q9, _mm_loadu_si128((const __m128i*)(const void*)(data+i-10)));
+						mull = _mm_mullo_epi32(q8, _mm_loadu_si128((const __m128i*)(const void*)(data+i-9))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q7, _mm_loadu_si128((const __m128i*)(const void*)(data+i-8))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q6, _mm_loadu_si128((const __m128i*)(const void*)(data+i-7))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q5, _mm_loadu_si128((const __m128i*)(const void*)(data+i-6))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q4, _mm_loadu_si128((const __m128i*)(const void*)(data+i-5))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q3, _mm_loadu_si128((const __m128i*)(const void*)(data+i-4))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q2, _mm_loadu_si128((const __m128i*)(const void*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q1, _mm_loadu_si128((const __m128i*)(const void*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(const void*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
 						summ = _mm_sra_epi32(summ, cnt);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), summ));
 					}
 				}
 				else { /* order == 9 */
@@ -1074,17 +712,17 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m128i summ, mull;
-						summ = _mm_mullo_epi32(q8, _mm_loadu_si128((const __m128i*)(data+i-9)));
-						mull = _mm_mullo_epi32(q7, _mm_loadu_si128((const __m128i*)(data+i-8))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q6, _mm_loadu_si128((const __m128i*)(data+i-7))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q5, _mm_loadu_si128((const __m128i*)(data+i-6))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
+						summ = _mm_mullo_epi32(q8, _mm_loadu_si128((const __m128i*)(const void*)(data+i-9)));
+						mull = _mm_mullo_epi32(q7, _mm_loadu_si128((const __m128i*)(const void*)(data+i-8))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q6, _mm_loadu_si128((const __m128i*)(const void*)(data+i-7))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q5, _mm_loadu_si128((const __m128i*)(const void*)(data+i-6))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q4, _mm_loadu_si128((const __m128i*)(const void*)(data+i-5))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q3, _mm_loadu_si128((const __m128i*)(const void*)(data+i-4))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q2, _mm_loadu_si128((const __m128i*)(const void*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q1, _mm_loadu_si128((const __m128i*)(const void*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(const void*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
 						summ = _mm_sra_epi32(summ, cnt);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), summ));
 					}
 				}
 			}
@@ -1104,16 +742,16 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m128i summ, mull;
-						summ = _mm_mullo_epi32(q7, _mm_loadu_si128((const __m128i*)(data+i-8)));
-						mull = _mm_mullo_epi32(q6, _mm_loadu_si128((const __m128i*)(data+i-7))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q5, _mm_loadu_si128((const __m128i*)(data+i-6))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
+						summ = _mm_mullo_epi32(q7, _mm_loadu_si128((const __m128i*)(const void*)(data+i-8)));
+						mull = _mm_mullo_epi32(q6, _mm_loadu_si128((const __m128i*)(const void*)(data+i-7))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q5, _mm_loadu_si128((const __m128i*)(const void*)(data+i-6))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q4, _mm_loadu_si128((const __m128i*)(const void*)(data+i-5))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q3, _mm_loadu_si128((const __m128i*)(const void*)(data+i-4))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q2, _mm_loadu_si128((const __m128i*)(const void*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q1, _mm_loadu_si128((const __m128i*)(const void*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(const void*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
 						summ = _mm_sra_epi32(summ, cnt);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), summ));
 					}
 				}
 				else { /* order == 7 */
@@ -1128,15 +766,15 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m128i summ, mull;
-						summ = _mm_mullo_epi32(q6, _mm_loadu_si128((const __m128i*)(data+i-7)));
-						mull = _mm_mullo_epi32(q5, _mm_loadu_si128((const __m128i*)(data+i-6))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
+						summ = _mm_mullo_epi32(q6, _mm_loadu_si128((const __m128i*)(const void*)(data+i-7)));
+						mull = _mm_mullo_epi32(q5, _mm_loadu_si128((const __m128i*)(const void*)(data+i-6))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q4, _mm_loadu_si128((const __m128i*)(const void*)(data+i-5))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q3, _mm_loadu_si128((const __m128i*)(const void*)(data+i-4))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q2, _mm_loadu_si128((const __m128i*)(const void*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q1, _mm_loadu_si128((const __m128i*)(const void*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(const void*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
 						summ = _mm_sra_epi32(summ, cnt);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), summ));
 					}
 				}
 			}
@@ -1152,14 +790,14 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m128i summ, mull;
-						summ = _mm_mullo_epi32(q5, _mm_loadu_si128((const __m128i*)(data+i-6)));
-						mull = _mm_mullo_epi32(q4, _mm_loadu_si128((const __m128i*)(data+i-5))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
+						summ = _mm_mullo_epi32(q5, _mm_loadu_si128((const __m128i*)(const void*)(data+i-6)));
+						mull = _mm_mullo_epi32(q4, _mm_loadu_si128((const __m128i*)(const void*)(data+i-5))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q3, _mm_loadu_si128((const __m128i*)(const void*)(data+i-4))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q2, _mm_loadu_si128((const __m128i*)(const void*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q1, _mm_loadu_si128((const __m128i*)(const void*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(const void*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
 						summ = _mm_sra_epi32(summ, cnt);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), summ));
 					}
 				}
 				else { /* order == 5 */
@@ -1172,13 +810,13 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m128i summ, mull;
-						summ = _mm_mullo_epi32(q4, _mm_loadu_si128((const __m128i*)(data+i-5)));
-						mull = _mm_mullo_epi32(q3, _mm_loadu_si128((const __m128i*)(data+i-4))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
+						summ = _mm_mullo_epi32(q4, _mm_loadu_si128((const __m128i*)(const void*)(data+i-5)));
+						mull = _mm_mullo_epi32(q3, _mm_loadu_si128((const __m128i*)(const void*)(data+i-4))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q2, _mm_loadu_si128((const __m128i*)(const void*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q1, _mm_loadu_si128((const __m128i*)(const void*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(const void*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
 						summ = _mm_sra_epi32(summ, cnt);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), summ));
 					}
 				}
 			}
@@ -1194,12 +832,12 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m128i summ, mull;
-						summ = _mm_mullo_epi32(q3, _mm_loadu_si128((const __m128i*)(data+i-4)));
-						mull = _mm_mullo_epi32(q2, _mm_loadu_si128((const __m128i*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
+						summ = _mm_mullo_epi32(q3, _mm_loadu_si128((const __m128i*)(const void*)(data+i-4)));
+						mull = _mm_mullo_epi32(q2, _mm_loadu_si128((const __m128i*)(const void*)(data+i-3))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q1, _mm_loadu_si128((const __m128i*)(const void*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(const void*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
 						summ = _mm_sra_epi32(summ, cnt);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), summ));
 					}
 				}
 				else { /* order == 3 */
@@ -1210,11 +848,11 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m128i summ, mull;
-						summ = _mm_mullo_epi32(q2, _mm_loadu_si128((const __m128i*)(data+i-3)));
-						mull = _mm_mullo_epi32(q1, _mm_loadu_si128((const __m128i*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
-						mull = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
+						summ = _mm_mullo_epi32(q2, _mm_loadu_si128((const __m128i*)(const void*)(data+i-3)));
+						mull = _mm_mullo_epi32(q1, _mm_loadu_si128((const __m128i*)(const void*)(data+i-2))); summ = _mm_add_epi32(summ, mull);
+						mull = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(const void*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
 						summ = _mm_sra_epi32(summ, cnt);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), summ));
 					}
 				}
 			}
@@ -1226,10 +864,10 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m128i summ, mull;
-						summ = _mm_mullo_epi32(q1, _mm_loadu_si128((const __m128i*)(data+i-2)));
-						mull = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
+						summ = _mm_mullo_epi32(q1, _mm_loadu_si128((const __m128i*)(const void*)(data+i-2)));
+						mull = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(const void*)(data+i-1))); summ = _mm_add_epi32(summ, mull);
 						summ = _mm_sra_epi32(summ, cnt);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), summ));
 					}
 				}
 				else { /* order == 1 */
@@ -1238,9 +876,9 @@
 
 					for(i = 0; i < (int)data_len-3; i+=4) {
 						__m128i summ;
-						summ = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(data+i-1)));
+						summ = _mm_mullo_epi32(q0, _mm_loadu_si128((const __m128i*)(const void*)(data+i-1)));
 						summ = _mm_sra_epi32(summ, cnt);
-						_mm_storeu_si128((__m128i*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(data+i)), summ));
+						_mm_storeu_si128((__m128i*)(void*)(residual+i), _mm_sub_epi32(_mm_loadu_si128((const __m128i*)(const void*)(data+i)), summ));
 					}
 				}
 			}
@@ -1248,17 +886,17 @@
 		for(; i < (int)data_len; i++) {
 			sum = 0;
 			switch(order) {
-				case 12: sum += qlp_coeff[11] * data[i-12];
-				case 11: sum += qlp_coeff[10] * data[i-11];
-				case 10: sum += qlp_coeff[ 9] * data[i-10];
-				case 9:  sum += qlp_coeff[ 8] * data[i- 9];
-				case 8:  sum += qlp_coeff[ 7] * data[i- 8];
-				case 7:  sum += qlp_coeff[ 6] * data[i- 7];
-				case 6:  sum += qlp_coeff[ 5] * data[i- 6];
-				case 5:  sum += qlp_coeff[ 4] * data[i- 5];
-				case 4:  sum += qlp_coeff[ 3] * data[i- 4];
-				case 3:  sum += qlp_coeff[ 2] * data[i- 3];
-				case 2:  sum += qlp_coeff[ 1] * data[i- 2];
+				case 12: sum += qlp_coeff[11] * data[i-12]; /* Falls through. */
+				case 11: sum += qlp_coeff[10] * data[i-11]; /* Falls through. */
+				case 10: sum += qlp_coeff[ 9] * data[i-10]; /* Falls through. */
+				case 9:  sum += qlp_coeff[ 8] * data[i- 9]; /* Falls through. */
+				case 8:  sum += qlp_coeff[ 7] * data[i- 8]; /* Falls through. */
+				case 7:  sum += qlp_coeff[ 6] * data[i- 7]; /* Falls through. */
+				case 6:  sum += qlp_coeff[ 5] * data[i- 6]; /* Falls through. */
+				case 5:  sum += qlp_coeff[ 4] * data[i- 5]; /* Falls through. */
+				case 4:  sum += qlp_coeff[ 3] * data[i- 4]; /* Falls through. */
+				case 3:  sum += qlp_coeff[ 2] * data[i- 3]; /* Falls through. */
+				case 2:  sum += qlp_coeff[ 1] * data[i- 2]; /* Falls through. */
 				case 1:  sum += qlp_coeff[ 0] * data[i- 1];
 			}
 			residual[i] = data[i] - (sum >> lp_quantization);
@@ -1268,25 +906,25 @@
 		for(i = 0; i < (int)data_len; i++) {
 			sum = 0;
 			switch(order) {
-				case 32: sum += qlp_coeff[31] * data[i-32];
-				case 31: sum += qlp_coeff[30] * data[i-31];
-				case 30: sum += qlp_coeff[29] * data[i-30];
-				case 29: sum += qlp_coeff[28] * data[i-29];
-				case 28: sum += qlp_coeff[27] * data[i-28];
-				case 27: sum += qlp_coeff[26] * data[i-27];
-				case 26: sum += qlp_coeff[25] * data[i-26];
-				case 25: sum += qlp_coeff[24] * data[i-25];
-				case 24: sum += qlp_coeff[23] * data[i-24];
-				case 23: sum += qlp_coeff[22] * data[i-23];
-				case 22: sum += qlp_coeff[21] * data[i-22];
-				case 21: sum += qlp_coeff[20] * data[i-21];
-				case 20: sum += qlp_coeff[19] * data[i-20];
-				case 19: sum += qlp_coeff[18] * data[i-19];
-				case 18: sum += qlp_coeff[17] * data[i-18];
-				case 17: sum += qlp_coeff[16] * data[i-17];
-				case 16: sum += qlp_coeff[15] * data[i-16];
-				case 15: sum += qlp_coeff[14] * data[i-15];
-				case 14: sum += qlp_coeff[13] * data[i-14];
+				case 32: sum += qlp_coeff[31] * data[i-32]; /* Falls through. */
+				case 31: sum += qlp_coeff[30] * data[i-31]; /* Falls through. */
+				case 30: sum += qlp_coeff[29] * data[i-30]; /* Falls through. */
+				case 29: sum += qlp_coeff[28] * data[i-29]; /* Falls through. */
+				case 28: sum += qlp_coeff[27] * data[i-28]; /* Falls through. */
+				case 27: sum += qlp_coeff[26] * data[i-27]; /* Falls through. */
+				case 26: sum += qlp_coeff[25] * data[i-26]; /* Falls through. */
+				case 25: sum += qlp_coeff[24] * data[i-25]; /* Falls through. */
+				case 24: sum += qlp_coeff[23] * data[i-24]; /* Falls through. */
+				case 23: sum += qlp_coeff[22] * data[i-23]; /* Falls through. */
+				case 22: sum += qlp_coeff[21] * data[i-22]; /* Falls through. */
+				case 21: sum += qlp_coeff[20] * data[i-21]; /* Falls through. */
+				case 20: sum += qlp_coeff[19] * data[i-20]; /* Falls through. */
+				case 19: sum += qlp_coeff[18] * data[i-19]; /* Falls through. */
+				case 18: sum += qlp_coeff[17] * data[i-18]; /* Falls through. */
+				case 17: sum += qlp_coeff[16] * data[i-17]; /* Falls through. */
+				case 16: sum += qlp_coeff[15] * data[i-16]; /* Falls through. */
+				case 15: sum += qlp_coeff[14] * data[i-15]; /* Falls through. */
+				case 14: sum += qlp_coeff[13] * data[i-14]; /* Falls through. */
 				case 13: sum += qlp_coeff[12] * data[i-13];
 				         sum += qlp_coeff[11] * data[i-12];
 				         sum += qlp_coeff[10] * data[i-11];

diff --git a/src/libFLAC/lpc_intrin_vsx.c b/src/libFLAC/lpc_intrin_vsx.c
new file mode 100644
index 0000000..3f823b9
--- /dev/null
+++ b/src/libFLAC/lpc_intrin_vsx.c

@@ -0,0 +1,102 @@
+/* libFLAC - Free Lossless Audio Codec library
+ * Copyright (C) 2000-2009  Josh Coalson
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * - Neither the name of the Xiph.org Foundation nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef HAVE_CONFIG_H
+#  include <config.h>
+#endif
+
+#ifndef FLAC__INTEGER_ONLY_LIBRARY
+#ifndef FLAC__NO_ASM
+#if defined(FLAC__CPU_PPC64) && defined(FLAC__USE_VSX)
+
+#include "private/cpu.h"
+#include "private/lpc.h"
+#include "FLAC/assert.h"
+#include "FLAC/format.h"
+
+#include <altivec.h>
+
+#ifdef FLAC__HAS_TARGET_POWER8
+__attribute__((target("cpu=power8")))
+void FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_14(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[])
+{
+#undef MAX_LAG
+#define MAX_LAG 14
+#include "deduplication/lpc_compute_autocorrelation_intrin_vsx.c"
+}
+
+__attribute__((target("cpu=power8")))
+void FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_10(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[])
+{
+#undef MAX_LAG
+#define MAX_LAG 10
+#include "deduplication/lpc_compute_autocorrelation_intrin_vsx.c"
+}
+
+__attribute__((target("cpu=power8")))
+void FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_8(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[])
+{
+#undef MAX_LAG
+#define MAX_LAG 8
+#include "deduplication/lpc_compute_autocorrelation_intrin_vsx.c"
+}
+#endif /* FLAC__HAS_TARGET_POWER8 */
+
+#ifdef FLAC__HAS_TARGET_POWER9
+__attribute__((target("cpu=power9")))
+void FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_14(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[])
+{
+#undef MAX_LAG
+#define MAX_LAG 14
+#include "deduplication/lpc_compute_autocorrelation_intrin_vsx.c"
+}
+
+__attribute__((target("cpu=power9")))
+void FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_10(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[])
+{
+#undef MAX_LAG
+#define MAX_LAG 10
+#include "deduplication/lpc_compute_autocorrelation_intrin_vsx.c"
+}
+
+__attribute__((target("cpu=power9")))
+void FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_8(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[])
+{
+#undef MAX_LAG
+#define MAX_LAG 8
+#include "deduplication/lpc_compute_autocorrelation_intrin_vsx.c"
+}
+#endif /* FLAC__HAS_TARGET_POWER9 */
+
+#endif /* FLAC__CPU_PPC64 && FLAC__USE_VSX */
+#endif /* FLAC__NO_ASM */
+#endif /* FLAC__INTEGER_ONLY_LIBRARY */

diff --git a/src/libFLAC/md5.c b/src/libFLAC/md5.c
index 58c9d65..09933d7 100644
--- a/src/libFLAC/md5.c
+++ b/src/libFLAC/md5.c

@@ -137,7 +137,7 @@
 
 #if WORDS_BIGENDIAN
 //@@@@@@ OPT: use bswap/intrinsics
-static void byteSwap(FLAC__uint32 *buf, unsigned words)
+static void byteSwap(FLAC__uint32 *buf, uint32_t words)
 {
 	register FLAC__uint32 x;
 	do {
@@ -176,7 +176,7 @@
  * Update context to reflect the concatenation of another buffer full
  * of bytes.
  */
-static void FLAC__MD5Update(FLAC__MD5Context *ctx, FLAC__byte const *buf, unsigned len)
+static void FLAC__MD5Update(FLAC__MD5Context *ctx, FLAC__byte const *buf, uint32_t len)
 {
 	FLAC__uint32 t;
 
@@ -225,7 +225,7 @@
 	ctx->bytes[0] = 0;
 	ctx->bytes[1] = 0;
 
-	ctx->internal_buf.p8= 0;
+	ctx->internal_buf.p8 = 0;
 	ctx->capacity = 0;
 }
 
@@ -263,7 +263,7 @@
 	memcpy(digest, ctx->buf, 16);
 	if (0 != ctx->internal_buf.p8) {
 		free(ctx->internal_buf.p8);
-		ctx->internal_buf.p8= 0;
+		ctx->internal_buf.p8 = 0;
 		ctx->capacity = 0;
 	}
 	memset(ctx, 0, sizeof(*ctx));	/* In case it's sensitive */
@@ -272,13 +272,13 @@
 /*
  * Convert the incoming audio signal to a byte stream
  */
-static void format_input_(FLAC__multibyte *mbuf, const FLAC__int32 * const signal[], unsigned channels, unsigned samples, unsigned bytes_per_sample)
+static void format_input_(FLAC__multibyte *mbuf, const FLAC__int32 * const signal[], uint32_t channels, uint32_t samples, uint32_t bytes_per_sample)
 {
 	FLAC__byte *buf_ = mbuf->p8;
 	FLAC__int16 *buf16 = mbuf->p16;
 	FLAC__int32 *buf32 = mbuf->p32;
 	FLAC__int32 a_word;
-	unsigned channel, sample;
+	uint32_t channel, sample;
 
 	/* Storage in the output buffer, buf, is little endian. */
 
@@ -489,7 +489,7 @@
 /*
  * Convert the incoming audio signal to a byte stream and FLAC__MD5Update it.
  */
-FLAC__bool FLAC__MD5Accumulate(FLAC__MD5Context *ctx, const FLAC__int32 * const signal[], unsigned channels, unsigned samples, unsigned bytes_per_sample)
+FLAC__bool FLAC__MD5Accumulate(FLAC__MD5Context *ctx, const FLAC__int32 * const signal[], uint32_t channels, uint32_t samples, uint32_t bytes_per_sample)
 {
 	const size_t bytes_needed = (size_t)channels * (size_t)samples * (size_t)bytes_per_sample;
 
@@ -500,14 +500,12 @@
 		return false;
 
 	if (ctx->capacity < bytes_needed) {
-		FLAC__byte *tmp = realloc(ctx->internal_buf.p8, bytes_needed);
-		if (0 == tmp) {
-			free(ctx->internal_buf.p8);
-			if (0 == (ctx->internal_buf.p8= safe_malloc_(bytes_needed)))
+		if (0 == (ctx->internal_buf.p8 = safe_realloc_(ctx->internal_buf.p8, bytes_needed))) {
+			if (0 == (ctx->internal_buf.p8 = safe_malloc_(bytes_needed))) {
+				ctx->capacity = 0;
 				return false;
+			}
 		}
-		else
-			ctx->internal_buf.p8= tmp;
 		ctx->capacity = bytes_needed;
 	}
 

diff --git a/src/libFLAC/memory.c b/src/libFLAC/memory.c
index 10ed283..d0aa837 100644
--- a/src/libFLAC/memory.c
+++ b/src/libFLAC/memory.c

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2001-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -118,6 +118,35 @@
 	}
 }
 
+FLAC__bool FLAC__memory_alloc_aligned_int64_array(size_t elements, FLAC__int64 **unaligned_pointer, FLAC__int64 **aligned_pointer)
+{
+	FLAC__int64 *pu; /* unaligned pointer */
+	union { /* union needed to comply with C99 pointer aliasing rules */
+		FLAC__int64 *pa; /* aligned pointer */
+		void         *pv; /* aligned pointer alias */
+	} u;
+
+	FLAC__ASSERT(elements > 0);
+	FLAC__ASSERT(0 != unaligned_pointer);
+	FLAC__ASSERT(0 != aligned_pointer);
+	FLAC__ASSERT(unaligned_pointer != aligned_pointer);
+
+	if(elements > SIZE_MAX / sizeof(*pu)) /* overflow check */
+		return false;
+
+	pu = FLAC__memory_alloc_aligned(sizeof(*pu) * elements, &u.pv);
+	if(0 == pu) {
+		return false;
+	}
+	else {
+		if(*unaligned_pointer != 0)
+			free(*unaligned_pointer);
+		*unaligned_pointer = pu;
+		*aligned_pointer = u.pa;
+		return true;
+	}
+}
+
 FLAC__bool FLAC__memory_alloc_aligned_uint64_array(size_t elements, FLAC__uint64 **unaligned_pointer, FLAC__uint64 **aligned_pointer)
 {
 	FLAC__uint64 *pu; /* unaligned pointer */
@@ -147,11 +176,11 @@
 	}
 }
 
-FLAC__bool FLAC__memory_alloc_aligned_unsigned_array(size_t elements, unsigned **unaligned_pointer, unsigned **aligned_pointer)
+FLAC__bool FLAC__memory_alloc_aligned_unsigned_array(size_t elements, uint32_t **unaligned_pointer, uint32_t **aligned_pointer)
 {
-	unsigned *pu; /* unaligned pointer */
+	uint32_t *pu; /* unaligned pointer */
 	union { /* union needed to comply with C99 pointer aliasing rules */
-		unsigned *pa; /* aligned pointer */
+		uint32_t *pa; /* aligned pointer */
 		void     *pv; /* aligned pointer alias */
 	} u;
 

diff --git a/src/libFLAC/metadata_iterators.c b/src/libFLAC/metadata_iterators.c
index 1e28925..168a3fb 100644
--- a/src/libFLAC/metadata_iterators.c
+++ b/src/libFLAC/metadata_iterators.c

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2001-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -62,50 +62,50 @@
  *
  ***************************************************************************/
 
-static void pack_uint32_(FLAC__uint32 val, FLAC__byte *b, unsigned bytes);
-static void pack_uint32_little_endian_(FLAC__uint32 val, FLAC__byte *b, unsigned bytes);
-static void pack_uint64_(FLAC__uint64 val, FLAC__byte *b, unsigned bytes);
-static FLAC__uint32 unpack_uint32_(FLAC__byte *b, unsigned bytes);
-static FLAC__uint32 unpack_uint32_little_endian_(FLAC__byte *b, unsigned bytes);
-static FLAC__uint64 unpack_uint64_(FLAC__byte *b, unsigned bytes);
+static void pack_uint32_(FLAC__uint32 val, FLAC__byte *b, uint32_t bytes);
+static void pack_uint32_little_endian_(FLAC__uint32 val, FLAC__byte *b, uint32_t bytes);
+static void pack_uint64_(FLAC__uint64 val, FLAC__byte *b, uint32_t bytes);
+static FLAC__uint32 unpack_uint32_(FLAC__byte *b, uint32_t bytes);
+static FLAC__uint32 unpack_uint32_little_endian_(FLAC__byte *b, uint32_t bytes);
+static FLAC__uint64 unpack_uint64_(FLAC__byte *b, uint32_t bytes);
 
 static FLAC__bool read_metadata_block_header_(FLAC__Metadata_SimpleIterator *iterator);
 static FLAC__bool read_metadata_block_data_(FLAC__Metadata_SimpleIterator *iterator, FLAC__StreamMetadata *block);
-static FLAC__bool read_metadata_block_header_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__bool *is_last, FLAC__MetadataType *type, unsigned *length);
+static FLAC__bool read_metadata_block_header_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__bool *is_last, FLAC__MetadataType *type, uint32_t *length);
 static FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__IOCallback_Seek seek_cb, FLAC__StreamMetadata *block);
 static FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_streaminfo_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_StreamInfo *block);
-static FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_padding_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Seek seek_cb, FLAC__StreamMetadata_Padding *block, unsigned block_length);
-static FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_application_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_Application *block, unsigned block_length);
-static FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_seektable_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_SeekTable *block, unsigned block_length);
-static FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_vorbis_comment_entry_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_VorbisComment_Entry *entry, unsigned max_length);
-static FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_vorbis_comment_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__IOCallback_Seek seek_cb, FLAC__StreamMetadata_VorbisComment *block, unsigned block_length);
+static FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_padding_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Seek seek_cb, FLAC__StreamMetadata_Padding *block, uint32_t block_length);
+static FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_application_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_Application *block, uint32_t block_length);
+static FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_seektable_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_SeekTable *block, uint32_t block_length);
+static FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_vorbis_comment_entry_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_VorbisComment_Entry *entry, uint32_t max_length);
+static FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_vorbis_comment_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__IOCallback_Seek seek_cb, FLAC__StreamMetadata_VorbisComment *block, uint32_t block_length);
 static FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_cuesheet_track_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_CueSheet_Track *track);
 static FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_cuesheet_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_CueSheet *block);
 static FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_picture_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_Picture *block);
-static FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_unknown_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_Unknown *block, unsigned block_length);
+static FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_unknown_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_Unknown *block, uint32_t block_length);
 
 static FLAC__bool write_metadata_block_header_(FILE *file, FLAC__Metadata_SimpleIteratorStatus *status, const FLAC__StreamMetadata *block);
 static FLAC__bool write_metadata_block_data_(FILE *file, FLAC__Metadata_SimpleIteratorStatus *status, const FLAC__StreamMetadata *block);
 static FLAC__bool write_metadata_block_header_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata *block);
 static FLAC__bool write_metadata_block_data_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata *block);
 static FLAC__bool write_metadata_block_data_streaminfo_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_StreamInfo *block);
-static FLAC__bool write_metadata_block_data_padding_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_Padding *block, unsigned block_length);
-static FLAC__bool write_metadata_block_data_application_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_Application *block, unsigned block_length);
+static FLAC__bool write_metadata_block_data_padding_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_Padding *block, uint32_t block_length);
+static FLAC__bool write_metadata_block_data_application_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_Application *block, uint32_t block_length);
 static FLAC__bool write_metadata_block_data_seektable_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_SeekTable *block);
 static FLAC__bool write_metadata_block_data_vorbis_comment_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_VorbisComment *block);
 static FLAC__bool write_metadata_block_data_cuesheet_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_CueSheet *block);
 static FLAC__bool write_metadata_block_data_picture_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_Picture *block);
-static FLAC__bool write_metadata_block_data_unknown_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_Unknown *block, unsigned block_length);
+static FLAC__bool write_metadata_block_data_unknown_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_Unknown *block, uint32_t block_length);
 
 static FLAC__bool write_metadata_block_stationary_(FLAC__Metadata_SimpleIterator *iterator, const FLAC__StreamMetadata *block);
-static FLAC__bool write_metadata_block_stationary_with_padding_(FLAC__Metadata_SimpleIterator *iterator, FLAC__StreamMetadata *block, unsigned padding_length, FLAC__bool padding_is_last);
+static FLAC__bool write_metadata_block_stationary_with_padding_(FLAC__Metadata_SimpleIterator *iterator, FLAC__StreamMetadata *block, uint32_t padding_length, FLAC__bool padding_is_last);
 static FLAC__bool rewrite_whole_file_(FLAC__Metadata_SimpleIterator *iterator, FLAC__StreamMetadata *block, FLAC__bool append);
 
 static void simple_iterator_push_(FLAC__Metadata_SimpleIterator *iterator);
 static FLAC__bool simple_iterator_pop_(FLAC__Metadata_SimpleIterator *iterator);
 
-static unsigned seek_to_first_metadata_block_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__IOCallback_Seek seek_cb);
-static unsigned seek_to_first_metadata_block_(FILE *f);
+static uint32_t seek_to_first_metadata_block_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__IOCallback_Seek seek_cb);
+static uint32_t seek_to_first_metadata_block_(FILE *f);
 
 static FLAC__bool simple_iterator_copy_file_prefix_(FLAC__Metadata_SimpleIterator *iterator, FILE **tempfile, char **tempfilename, FLAC__bool append);
 static FLAC__bool simple_iterator_copy_file_postfix_(FLAC__Metadata_SimpleIterator *iterator, FILE **tempfile, char **tempfilename, int fixup_is_last_code, FLAC__off_t fixup_is_last_flag_offset, FLAC__bool backup);
@@ -265,7 +265,7 @@
 		cd->got_error = true;
 }
 
-FLAC_API FLAC__bool FLAC__metadata_get_picture(const char *filename, FLAC__StreamMetadata **picture, FLAC__StreamMetadata_Picture_Type type, const char *mime_type, const FLAC__byte *description, unsigned max_width, unsigned max_height, unsigned max_depth, unsigned max_colors)
+FLAC_API FLAC__bool FLAC__metadata_get_picture(const char *filename, FLAC__StreamMetadata **picture, FLAC__StreamMetadata_Picture_Type type, const char *mime_type, const FLAC__byte *description, uint32_t max_width, uint32_t max_height, uint32_t max_depth, uint32_t max_colors)
 {
 	FLAC__Metadata_SimpleIterator *it;
 	FLAC__uint64 max_area_seen = 0;
@@ -286,26 +286,29 @@
 	do {
 		if(FLAC__metadata_simple_iterator_get_block_type(it) == FLAC__METADATA_TYPE_PICTURE) {
 			FLAC__StreamMetadata *obj = FLAC__metadata_simple_iterator_get_block(it);
-			FLAC__uint64 area = (FLAC__uint64)obj->data.picture.width * (FLAC__uint64)obj->data.picture.height;
-			/* check constraints */
-			if(
-				(type == (FLAC__StreamMetadata_Picture_Type)(-1) || type == obj->data.picture.type) &&
-				(mime_type == 0 || !strcmp(mime_type, obj->data.picture.mime_type)) &&
-				(description == 0 || !strcmp((const char *)description, (const char *)obj->data.picture.description)) &&
-				obj->data.picture.width <= max_width &&
-				obj->data.picture.height <= max_height &&
-				obj->data.picture.depth <= max_depth &&
-				obj->data.picture.colors <= max_colors &&
-				(area > max_area_seen || (area == max_area_seen && obj->data.picture.depth > max_depth_seen))
-			) {
-				if(*picture)
-					FLAC__metadata_object_delete(*picture);
-				*picture = obj;
-				max_area_seen = area;
-				max_depth_seen = obj->data.picture.depth;
-			}
-			else {
-				FLAC__metadata_object_delete(obj);
+			if(0 != obj) {
+				FLAC__uint64 area = (FLAC__uint64)obj->data.picture.width * (FLAC__uint64)obj->data.picture.height;
+
+				/* check constraints */
+				if(
+					(type == (FLAC__StreamMetadata_Picture_Type)(-1) || type == obj->data.picture.type) &&
+					(mime_type == 0 || !strcmp(mime_type, obj->data.picture.mime_type)) &&
+					(description == 0 || !strcmp((const char *)description, (const char *)obj->data.picture.description)) &&
+					obj->data.picture.width <= max_width &&
+					obj->data.picture.height <= max_height &&
+					obj->data.picture.depth <= max_depth &&
+					obj->data.picture.colors <= max_colors &&
+					(area > max_area_seen || (area == max_area_seen && obj->data.picture.depth > max_depth_seen))
+				) {
+					if(*picture)
+						FLAC__metadata_object_delete(*picture);
+					*picture = obj;
+					max_area_seen = area;
+					max_depth_seen = obj->data.picture.depth;
+				}
+				else {
+					FLAC__metadata_object_delete(obj);
+				}
 			}
 		}
 	} while(FLAC__metadata_simple_iterator_next(it));
@@ -334,11 +337,11 @@
 	FLAC__Metadata_SimpleIteratorStatus status;
 	FLAC__off_t offset[SIMPLE_ITERATOR_MAX_PUSH_DEPTH];
 	FLAC__off_t first_offset; /* this is the offset to the STREAMINFO block */
-	unsigned depth;
+	uint32_t depth;
 	/* this is the metadata block header of the current block we are pointing to: */
 	FLAC__bool is_last;
 	FLAC__MetadataType type;
-	unsigned length;
+	uint32_t length;
 };
 
 FLAC_API const char * const FLAC__Metadata_SimpleIteratorStatusString[] = {
@@ -417,7 +420,7 @@
 
 static FLAC__bool simple_iterator_prime_input_(FLAC__Metadata_SimpleIterator *iterator, FLAC__bool read_only)
 {
-	unsigned ret;
+	uint32_t ret;
 
 	FLAC__ASSERT(0 != iterator);
 
@@ -443,7 +446,15 @@
 		case 0:
 			iterator->depth = 0;
 			iterator->first_offset = iterator->offset[iterator->depth] = ftello(iterator->file);
-			return read_metadata_block_header_(iterator);
+			ret = read_metadata_block_header_(iterator);
+			/* The first metadata block must be a streaminfo. If this is not the
+			 * case, the file is invalid and assumptions made elsewhere in the
+			 * code are invalid */
+			if(iterator->type != FLAC__METADATA_TYPE_STREAMINFO) {
+				iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_BAD_METADATA;
+				return false;
+			}
+			return ret;
 		case 1:
 			iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR;
 			return false;
@@ -576,7 +587,7 @@
 	FLAC__ASSERT(0 != iterator);
 	FLAC__ASSERT(0 != iterator->file);
 
-	return iterator->offset[iterator->depth];
+	return (off_t)iterator->offset[iterator->depth];
 }
 
 FLAC_API FLAC__MetadataType FLAC__metadata_simple_iterator_get_block_type(const FLAC__Metadata_SimpleIterator *iterator)
@@ -588,7 +599,7 @@
 }
 
 /*@@@@add to tests*/
-FLAC_API unsigned FLAC__metadata_simple_iterator_get_block_length(const FLAC__Metadata_SimpleIterator *iterator)
+FLAC_API uint32_t FLAC__metadata_simple_iterator_get_block_length(const FLAC__Metadata_SimpleIterator *iterator)
 {
 	FLAC__ASSERT(0 != iterator);
 	FLAC__ASSERT(0 != iterator->file);
@@ -599,7 +610,7 @@
 /*@@@@add to tests*/
 FLAC_API FLAC__bool FLAC__metadata_simple_iterator_get_application_id(FLAC__Metadata_SimpleIterator *iterator, FLAC__byte *id)
 {
-	const unsigned id_bytes = FLAC__STREAM_METADATA_APPLICATION_ID_LEN / 8;
+	const uint32_t id_bytes = FLAC__STREAM_METADATA_APPLICATION_ID_LEN / 8;
 
 	FLAC__ASSERT(0 != iterator);
 	FLAC__ASSERT(0 != iterator->file);
@@ -693,7 +704,7 @@
 		}
 	}
 	else /* iterator->length < block->length */ {
-		unsigned padding_leftover = 0;
+		uint32_t padding_leftover = 0;
 		FLAC__bool padding_is_last = false;
 		if(use_padding) {
 			/* first see if we can even use padding */
@@ -701,7 +712,7 @@
 				use_padding = false;
 			}
 			else {
-				const unsigned extra_padding_bytes_required = block->length - iterator->length;
+				const uint32_t extra_padding_bytes_required = block->length - iterator->length;
 				simple_iterator_push_(iterator);
 				if(!FLAC__metadata_simple_iterator_next(iterator)) {
 					(void)simple_iterator_pop_(iterator);
@@ -753,7 +764,7 @@
 
 FLAC_API FLAC__bool FLAC__metadata_simple_iterator_insert_block_after(FLAC__Metadata_SimpleIterator *iterator, FLAC__StreamMetadata *block, FLAC__bool use_padding)
 {
-	unsigned padding_leftover = 0;
+	uint32_t padding_leftover = 0;
 	FLAC__bool padding_is_last = false;
 
 	FLAC__ASSERT_DECLARATION(FLAC__off_t debug_target_offset = iterator->offset[iterator->depth] + FLAC__STREAM_METADATA_HEADER_LENGTH + iterator->length;)
@@ -837,6 +848,11 @@
 	FLAC__ASSERT_DECLARATION(FLAC__off_t debug_target_offset = iterator->offset[iterator->depth];)
 	FLAC__bool ret;
 
+	if(!iterator->is_writable) {
+		iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_NOT_WRITABLE;
+		return false;
+	}
+
 	if(iterator->type == FLAC__METADATA_TYPE_STREAMINFO) {
 		iterator->status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_ILLEGAL_INPUT;
 		return false;
@@ -887,7 +903,7 @@
 	FLAC__bool is_ogg;
 	FLAC__Metadata_Node *head;
 	FLAC__Metadata_Node *tail;
-	unsigned nodes;
+	uint32_t nodes;
 	FLAC__Metadata_ChainStatus status;
 	FLAC__off_t first_offset, last_offset;
 	/*
@@ -1084,8 +1100,8 @@
 static FLAC__bool chain_merge_adjacent_padding_(FLAC__Metadata_Chain *chain, FLAC__Metadata_Node *node)
 {
 	if(node->data->type == FLAC__METADATA_TYPE_PADDING && 0 != node->next && node->next->data->type == FLAC__METADATA_TYPE_PADDING) {
-		const unsigned growth = FLAC__STREAM_METADATA_HEADER_LENGTH + node->next->data->length;
-		node->data->length += growth;
+		const uint32_t growth = FLAC__STREAM_METADATA_HEADER_LENGTH + node->next->data->length;
+		node->data->length += growth; /* new block size can be greater than max metadata block size, but it'll be fixed later in chain_prepare_for_write_() */
 
 		chain_delete_node_(chain, node->next);
 		return true;
@@ -1094,6 +1110,11 @@
 		return false;
 }
 
+#if defined(_MSC_VER)
+// silence three MSVC warnings 'conversion from 'conversion from 'const __int64' to 'uint32_t', possible loss of data'
+#pragma warning ( disable : 4244 )
+#endif
+
 /* Returns the new length of the chain, or 0 if there was an error. */
 /* WATCHOUT: This can get called multiple times before a write, so
  * it should still work when this happens.
@@ -1152,9 +1173,29 @@
 		}
 	}
 
+	/* check sizes of all metadata blocks; reduce padding size if necessary */
+	{
+		FLAC__Metadata_Node *node;
+		for (node = chain->head; node; node = node->next) {
+			if(node->data->length >= (1u << FLAC__STREAM_METADATA_LENGTH_LEN)) {
+				if(node->data->type == FLAC__METADATA_TYPE_PADDING) {
+					node->data->length = (1u << FLAC__STREAM_METADATA_LENGTH_LEN) - 1;
+					current_length = chain_calculate_length_(chain);
+				} else {
+					chain->status = FLAC__METADATA_CHAIN_STATUS_BAD_METADATA;
+					return 0;
+				}
+			}
+		}
+	}
+
 	return current_length;
 }
 
+#if defined(_MSC_VER)
+#pragma warning ( default : 4244 )
+#endif
+
 static FLAC__bool chain_read_cb_(FLAC__Metadata_Chain *chain, FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__IOCallback_Seek seek_cb, FLAC__IOCallback_Tell tell_cb)
 {
 	FLAC__Metadata_Node *node;
@@ -1192,7 +1233,7 @@
 	{
 		FLAC__bool is_last;
 		FLAC__MetadataType type;
-		unsigned length;
+		uint32_t length;
 
 		do {
 			node = node_new_();
@@ -1327,6 +1368,12 @@
 
 	chain->initial_length = chain_calculate_length_(chain);
 
+	if(chain->initial_length == 0) {
+		/* Ogg FLAC file must have at least streaminfo and vorbis comment */
+		chain->status = FLAC__METADATA_CHAIN_STATUS_BAD_METADATA;
+		return false;
+	}
+
 	return true;
 }
 
@@ -1597,40 +1644,99 @@
 	return chain_read_with_callbacks_(chain, handle, callbacks, /*is_ogg=*/true);
 }
 
+typedef enum {
+	LBS_NONE = 0,
+	LBS_SIZE_CHANGED,
+	LBS_BLOCK_ADDED,
+	LBS_BLOCK_REMOVED
+} LastBlockState;
+
+#if defined(_MSC_VER)
+// silence three MSVC warnings 'conversion from 'conversion from 'const __int64' to 'uint32_t', possible loss of data'
+#pragma warning ( disable : 4244 )
+#endif
+
 FLAC_API FLAC__bool FLAC__metadata_chain_check_if_tempfile_needed(FLAC__Metadata_Chain *chain, FLAC__bool use_padding)
 {
 	/* This does all the same checks that are in chain_prepare_for_write_()
 	 * but doesn't actually alter the chain.  Make sure to update the logic
 	 * here if chain_prepare_for_write_() changes.
 	 */
-	const FLAC__off_t current_length = chain_calculate_length_(chain);
+	FLAC__off_t current_length;
+	LastBlockState lbs_state = LBS_NONE;
+	uint32_t lbs_size = 0;
 
 	FLAC__ASSERT(0 != chain);
 
+	current_length = chain_calculate_length_(chain);
+
 	if(use_padding) {
+		const FLAC__Metadata_Node * const node = chain->tail;
 		/* if the metadata shrank and the last block is padding, we just extend the last padding block */
-		if(current_length < chain->initial_length && chain->tail->data->type == FLAC__METADATA_TYPE_PADDING)
-			return false;
+		if(current_length < chain->initial_length && node->data->type == FLAC__METADATA_TYPE_PADDING) {
+			lbs_state = LBS_SIZE_CHANGED;
+			lbs_size = node->data->length + (chain->initial_length - current_length);
+		}
 		/* if the metadata shrank more than 4 bytes then there's room to add another padding block */
-		else if(current_length + (FLAC__off_t)FLAC__STREAM_METADATA_HEADER_LENGTH <= chain->initial_length)
-			return false;
+		else if(current_length + (FLAC__off_t)FLAC__STREAM_METADATA_HEADER_LENGTH <= chain->initial_length) {
+			lbs_state = LBS_BLOCK_ADDED;
+			lbs_size = chain->initial_length - (current_length + (FLAC__off_t)FLAC__STREAM_METADATA_HEADER_LENGTH);
+		}
 		/* if the metadata grew but the last block is padding, try cutting the padding to restore the original length so we don't have to rewrite the whole file */
 		else if(current_length > chain->initial_length) {
 			const FLAC__off_t delta = current_length - chain->initial_length;
-			if(chain->tail->data->type == FLAC__METADATA_TYPE_PADDING) {
+			if(node->data->type == FLAC__METADATA_TYPE_PADDING) {
 				/* if the delta is exactly the size of the last padding block, remove the padding block */
-				if((FLAC__off_t)chain->tail->data->length + (FLAC__off_t)FLAC__STREAM_METADATA_HEADER_LENGTH == delta)
-					return false;
+				if((FLAC__off_t)node->data->length + (FLAC__off_t)FLAC__STREAM_METADATA_HEADER_LENGTH == delta) {
+					lbs_state = LBS_BLOCK_REMOVED;
+					lbs_size = 0;
+				}
 				/* if there is at least 'delta' bytes of padding, trim the padding down */
-				else if((FLAC__off_t)chain->tail->data->length >= delta)
-					return false;
+				else if((FLAC__off_t)node->data->length >= delta) {
+					lbs_state = LBS_SIZE_CHANGED;
+					lbs_size = node->data->length - delta;
+				}
 			}
 		}
 	}
 
+	current_length = 0;
+	/* check sizes of all metadata blocks; reduce padding size if necessary */
+	{
+		const FLAC__Metadata_Node *node;
+		for(node = chain->head; node; node = node->next) {
+			uint32_t block_len = node->data->length;
+			if(node == chain->tail) {
+				if(lbs_state == LBS_BLOCK_REMOVED)
+					continue;
+				else if(lbs_state == LBS_SIZE_CHANGED)
+					block_len = lbs_size;
+			}
+			if(block_len >= (1u << FLAC__STREAM_METADATA_LENGTH_LEN)) {
+				if(node->data->type == FLAC__METADATA_TYPE_PADDING)
+					block_len = (1u << FLAC__STREAM_METADATA_LENGTH_LEN) - 1;
+				else
+					return false /* the return value doesn't matter */;
+			}
+			current_length += (FLAC__STREAM_METADATA_HEADER_LENGTH + block_len);
+		}
+
+		if(lbs_state == LBS_BLOCK_ADDED) {
+			/* test added padding block */
+			uint32_t block_len = lbs_size;
+			if(block_len >= (1u << FLAC__STREAM_METADATA_LENGTH_LEN))
+				block_len = (1u << FLAC__STREAM_METADATA_LENGTH_LEN) - 1;
+			current_length += (FLAC__STREAM_METADATA_HEADER_LENGTH + block_len);
+		}
+	}
+
 	return (current_length != chain->initial_length);
 }
 
+#if defined(_MSC_VER)
+#pragma warning ( default : 4244 )
+#endif
+
 FLAC_API FLAC__bool FLAC__metadata_chain_write(FLAC__Metadata_Chain *chain, FLAC__bool use_padding, FLAC__bool preserve_file_stats)
 {
 	struct flac_stat_s stats;
@@ -1793,7 +1899,7 @@
 FLAC_API void FLAC__metadata_chain_sort_padding(FLAC__Metadata_Chain *chain)
 {
 	FLAC__Metadata_Node *node, *save;
-	unsigned i;
+	uint32_t i;
 
 	FLAC__ASSERT(0 != chain);
 
@@ -1970,9 +2076,9 @@
  *
  ***************************************************************************/
 
-void pack_uint32_(FLAC__uint32 val, FLAC__byte *b, unsigned bytes)
+void pack_uint32_(FLAC__uint32 val, FLAC__byte *b, uint32_t bytes)
 {
-	unsigned i;
+	uint32_t i;
 
 	b += bytes;
 
@@ -1982,9 +2088,9 @@
 	}
 }
 
-void pack_uint32_little_endian_(FLAC__uint32 val, FLAC__byte *b, unsigned bytes)
+void pack_uint32_little_endian_(FLAC__uint32 val, FLAC__byte *b, uint32_t bytes)
 {
-	unsigned i;
+	uint32_t i;
 
 	for(i = 0; i < bytes; i++) {
 		*(b++) = (FLAC__byte)(val & 0xff);
@@ -1992,9 +2098,9 @@
 	}
 }
 
-void pack_uint64_(FLAC__uint64 val, FLAC__byte *b, unsigned bytes)
+void pack_uint64_(FLAC__uint64 val, FLAC__byte *b, uint32_t bytes)
 {
-	unsigned i;
+	uint32_t i;
 
 	b += bytes;
 
@@ -2004,10 +2110,10 @@
 	}
 }
 
-FLAC__uint32 unpack_uint32_(FLAC__byte *b, unsigned bytes)
+FLAC__uint32 unpack_uint32_(FLAC__byte *b, uint32_t bytes)
 {
 	FLAC__uint32 ret = 0;
-	unsigned i;
+	uint32_t i;
 
 	for(i = 0; i < bytes; i++)
 		ret = (ret << 8) | (FLAC__uint32)(*b++);
@@ -2015,10 +2121,10 @@
 	return ret;
 }
 
-FLAC__uint32 unpack_uint32_little_endian_(FLAC__byte *b, unsigned bytes)
+FLAC__uint32 unpack_uint32_little_endian_(FLAC__byte *b, uint32_t bytes)
 {
 	FLAC__uint32 ret = 0;
-	unsigned i;
+	uint32_t i;
 
 	b += bytes;
 
@@ -2028,10 +2134,10 @@
 	return ret;
 }
 
-FLAC__uint64 unpack_uint64_(FLAC__byte *b, unsigned bytes)
+FLAC__uint64 unpack_uint64_(FLAC__byte *b, uint32_t bytes)
 {
 	FLAC__uint64 ret = 0;
-	unsigned i;
+	uint32_t i;
 
 	for(i = 0; i < bytes; i++)
 		ret = (ret << 8) | (FLAC__uint64)(*b++);
@@ -2062,7 +2168,7 @@
 	return (iterator->status == FLAC__METADATA_SIMPLE_ITERATOR_STATUS_OK);
 }
 
-FLAC__bool read_metadata_block_header_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__bool *is_last, FLAC__MetadataType *type, unsigned *length)
+FLAC__bool read_metadata_block_header_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__bool *is_last, FLAC__MetadataType *type, uint32_t *length)
 {
 	FLAC__byte raw_header[FLAC__STREAM_METADATA_HEADER_LENGTH];
 
@@ -2120,16 +2226,16 @@
 	block->max_blocksize = unpack_uint32_(b, 2); b += 2;
 	block->min_framesize = unpack_uint32_(b, 3); b += 3;
 	block->max_framesize = unpack_uint32_(b, 3); b += 3;
-	block->sample_rate = (unpack_uint32_(b, 2) << 4) | ((unsigned)(b[2] & 0xf0) >> 4);
-	block->channels = (unsigned)((b[2] & 0x0e) >> 1) + 1;
-	block->bits_per_sample = ((((unsigned)(b[2] & 0x01)) << 4) | (((unsigned)(b[3] & 0xf0)) >> 4)) + 1;
+	block->sample_rate = (unpack_uint32_(b, 2) << 4) | ((uint32_t)(b[2] & 0xf0) >> 4);
+	block->channels = (uint32_t)((b[2] & 0x0e) >> 1) + 1;
+	block->bits_per_sample = ((((uint32_t)(b[2] & 0x01)) << 4) | (((uint32_t)(b[3] & 0xf0)) >> 4)) + 1;
 	block->total_samples = (((FLAC__uint64)(b[3] & 0x0f)) << 32) | unpack_uint64_(b+4, 4);
 	memcpy(block->md5sum, b+8, 16);
 
 	return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_OK;
 }
 
-FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_padding_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Seek seek_cb, FLAC__StreamMetadata_Padding *block, unsigned block_length)
+FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_padding_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Seek seek_cb, FLAC__StreamMetadata_Padding *block, uint32_t block_length)
 {
 	(void)block; /* nothing to do; we don't care about reading the padding bytes */
 
@@ -2139,9 +2245,9 @@
 	return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_OK;
 }
 
-FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_application_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_Application *block, unsigned block_length)
+FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_application_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_Application *block, uint32_t block_length)
 {
-	const unsigned id_bytes = FLAC__STREAM_METADATA_APPLICATION_ID_LEN / 8;
+	const uint32_t id_bytes = FLAC__STREAM_METADATA_APPLICATION_ID_LEN / 8;
 
 	if(read_cb(block->id, 1, id_bytes, handle) != id_bytes)
 		return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR;
@@ -2165,9 +2271,9 @@
 	return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_OK;
 }
 
-FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_seektable_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_SeekTable *block, unsigned block_length)
+FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_seektable_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_SeekTable *block, uint32_t block_length)
 {
-	unsigned i;
+	uint32_t i;
 	FLAC__byte buffer[FLAC__STREAM_METADATA_SEEKPOINT_LENGTH];
 
 	FLAC__ASSERT(block_length % FLAC__STREAM_METADATA_SEEKPOINT_LENGTH == 0);
@@ -2191,9 +2297,9 @@
 	return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_OK;
 }
 
-FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_vorbis_comment_entry_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_VorbisComment_Entry *entry, unsigned max_length)
+FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_vorbis_comment_entry_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_VorbisComment_Entry *entry, uint32_t max_length)
 {
-	const unsigned entry_length_len = FLAC__STREAM_METADATA_VORBIS_COMMENT_ENTRY_LENGTH_LEN / 8;
+	const uint32_t entry_length_len = FLAC__STREAM_METADATA_VORBIS_COMMENT_ENTRY_LENGTH_LEN / 8;
 	FLAC__byte buffer[4]; /* magic number is asserted below */
 
 	FLAC__ASSERT(FLAC__STREAM_METADATA_VORBIS_COMMENT_ENTRY_LENGTH_LEN / 8 == sizeof(buffer));
@@ -2213,27 +2319,22 @@
 	if(0 != entry->entry)
 		free(entry->entry);
 
-	if(entry->length == 0) {
-		entry->entry = 0;
-	}
-	else {
-		if(0 == (entry->entry = safe_malloc_add_2op_(entry->length, /*+*/1)))
-			return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_MEMORY_ALLOCATION_ERROR;
+	if(0 == (entry->entry = safe_malloc_add_2op_(entry->length, /*+*/1)))
+		return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_MEMORY_ALLOCATION_ERROR;
 
-		if(read_cb(entry->entry, 1, entry->length, handle) != entry->length)
-			return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR;
+	if(entry->length > 0 && read_cb(entry->entry, 1, entry->length, handle) != entry->length)
+		return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR;
 
-		entry->entry[entry->length] = '\0';
-	}
+	entry->entry[entry->length] = '\0';
 
 	return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_OK;
 }
 
-FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_vorbis_comment_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__IOCallback_Seek seek_cb, FLAC__StreamMetadata_VorbisComment *block, unsigned block_length)
+FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_vorbis_comment_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__IOCallback_Seek seek_cb, FLAC__StreamMetadata_VorbisComment *block, uint32_t block_length)
 {
-	unsigned i;
+	uint32_t i;
 	FLAC__Metadata_SimpleIteratorStatus status;
-	const unsigned num_comments_len = FLAC__STREAM_METADATA_VORBIS_COMMENT_NUM_COMMENTS_LEN / 8;
+	const uint32_t num_comments_len = FLAC__STREAM_METADATA_VORBIS_COMMENT_NUM_COMMENTS_LEN / 8;
 	FLAC__byte buffer[4]; /* magic number is asserted below */
 
 	FLAC__ASSERT(FLAC__STREAM_METADATA_VORBIS_COMMENT_NUM_COMMENTS_LEN / 8 == sizeof(buffer));
@@ -2255,8 +2356,15 @@
 	if(block->num_comments == 0) {
 		block->comments = 0;
 	}
-	else if(0 == (block->comments = calloc(block->num_comments, sizeof(FLAC__StreamMetadata_VorbisComment_Entry))))
+	else if(block->num_comments > (block_length >> 2)) { /* each comment needs at least 4 byte */
+		block->num_comments = 0;
+		status = FLAC__METADATA_SIMPLE_ITERATOR_STATUS_BAD_METADATA;
+		goto skip;
+	}
+	else if(0 == (block->comments = calloc(block->num_comments, sizeof(FLAC__StreamMetadata_VorbisComment_Entry)))) {
+		block->num_comments = 0;
 		return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_MEMORY_ALLOCATION_ERROR;
+	}
 
 	for(i = 0; i < block->num_comments; i++) {
 		status = read_metadata_block_data_vorbis_comment_entry_cb_(handle, read_cb, block->comments + i, block_length);
@@ -2281,7 +2389,7 @@
 
 FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_cuesheet_track_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_CueSheet_Track *track)
 {
-	unsigned i, len;
+	uint32_t i, len;
 	FLAC__byte buffer[32]; /* asserted below that this is big enough */
 
 	FLAC__ASSERT(sizeof(buffer) >= sizeof(FLAC__uint64));
@@ -2350,7 +2458,7 @@
 
 FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_cuesheet_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_CueSheet *block)
 {
-	unsigned i, len;
+	uint32_t i, len;
 	FLAC__Metadata_SimpleIteratorStatus status;
 	FLAC__byte buffer[1024]; /* MSVC needs a constant expression so we put a magic number and assert */
 
@@ -2409,6 +2517,9 @@
 		return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_READ_ERROR;
 	*length = unpack_uint32_(buffer, length_len);
 
+	if(*length > (1u << FLAC__STREAM_METADATA_LENGTH_LEN)) /* data cannot be larger than FLAC metadata block */
+		return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_BAD_METADATA;
+
 	if(0 != *data)
 		free(*data);
 
@@ -2480,7 +2591,7 @@
 	return FLAC__METADATA_SIMPLE_ITERATOR_STATUS_OK;
 }
 
-FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_unknown_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_Unknown *block, unsigned block_length)
+FLAC__Metadata_SimpleIteratorStatus read_metadata_block_data_unknown_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__StreamMetadata_Unknown *block, uint32_t block_length)
 {
 	if(block_length == 0) {
 		block->data = 0;
@@ -2529,6 +2640,9 @@
 	FLAC__byte buffer[FLAC__STREAM_METADATA_HEADER_LENGTH];
 
 	FLAC__ASSERT(block->length < (1u << FLAC__STREAM_METADATA_LENGTH_LEN));
+	/* double protection */
+	if(block->length >= (1u << FLAC__STREAM_METADATA_LENGTH_LEN))
+		return false;
 
 	buffer[0] = (block->is_last? 0x80 : 0) | (FLAC__byte)block->type;
 	pack_uint32_(block->length, buffer + 1, 3);
@@ -2566,8 +2680,8 @@
 FLAC__bool write_metadata_block_data_streaminfo_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_StreamInfo *block)
 {
 	FLAC__byte buffer[FLAC__STREAM_METADATA_STREAMINFO_LENGTH];
-	const unsigned channels1 = block->channels - 1;
-	const unsigned bps1 = block->bits_per_sample - 1;
+	const uint32_t channels1 = block->channels - 1;
+	const uint32_t bps1 = block->bits_per_sample - 1;
 
 	/* we are using hardcoded numbers for simplicity but we should
 	 * probably eventually write a bit-level packer and use the
@@ -2590,9 +2704,9 @@
 	return true;
 }
 
-FLAC__bool write_metadata_block_data_padding_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_Padding *block, unsigned block_length)
+FLAC__bool write_metadata_block_data_padding_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_Padding *block, uint32_t block_length)
 {
-	unsigned i, n = block_length;
+	uint32_t i, n = block_length;
 	FLAC__byte buffer[1024];
 
 	(void)block;
@@ -2611,9 +2725,9 @@
 	return true;
 }
 
-FLAC__bool write_metadata_block_data_application_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_Application *block, unsigned block_length)
+FLAC__bool write_metadata_block_data_application_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_Application *block, uint32_t block_length)
 {
-	const unsigned id_bytes = FLAC__STREAM_METADATA_APPLICATION_ID_LEN / 8;
+	const uint32_t id_bytes = FLAC__STREAM_METADATA_APPLICATION_ID_LEN / 8;
 
 	if(write_cb(block->id, 1, id_bytes, handle) != id_bytes)
 		return false;
@@ -2628,7 +2742,7 @@
 
 FLAC__bool write_metadata_block_data_seektable_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_SeekTable *block)
 {
-	unsigned i;
+	uint32_t i;
 	FLAC__byte buffer[FLAC__STREAM_METADATA_SEEKPOINT_LENGTH];
 
 	for(i = 0; i < block->num_points; i++) {
@@ -2645,9 +2759,9 @@
 
 FLAC__bool write_metadata_block_data_vorbis_comment_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_VorbisComment *block)
 {
-	unsigned i;
-	const unsigned entry_length_len = FLAC__STREAM_METADATA_VORBIS_COMMENT_ENTRY_LENGTH_LEN / 8;
-	const unsigned num_comments_len = FLAC__STREAM_METADATA_VORBIS_COMMENT_NUM_COMMENTS_LEN / 8;
+	uint32_t i;
+	const uint32_t entry_length_len = FLAC__STREAM_METADATA_VORBIS_COMMENT_ENTRY_LENGTH_LEN / 8;
+	const uint32_t num_comments_len = FLAC__STREAM_METADATA_VORBIS_COMMENT_NUM_COMMENTS_LEN / 8;
 	FLAC__byte buffer[4]; /* magic number is asserted below */
 
 	FLAC__ASSERT(flac_max(FLAC__STREAM_METADATA_VORBIS_COMMENT_ENTRY_LENGTH_LEN, FLAC__STREAM_METADATA_VORBIS_COMMENT_NUM_COMMENTS_LEN) / 8 == sizeof(buffer));
@@ -2675,7 +2789,7 @@
 
 FLAC__bool write_metadata_block_data_cuesheet_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_CueSheet *block)
 {
-	unsigned i, j, len;
+	uint32_t i, j, len;
 	FLAC__byte buffer[1024]; /* asserted below that this is big enough */
 
 	FLAC__ASSERT(sizeof(buffer) >= sizeof(FLAC__uint64));
@@ -2769,7 +2883,7 @@
 
 FLAC__bool write_metadata_block_data_picture_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_Picture *block)
 {
-	unsigned len;
+	uint32_t len;
 	size_t slen;
 	FLAC__byte buffer[4]; /* magic number is asserted below */
 
@@ -2841,7 +2955,7 @@
 	return true;
 }
 
-FLAC__bool write_metadata_block_data_unknown_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_Unknown *block, unsigned block_length)
+FLAC__bool write_metadata_block_data_unknown_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Write write_cb, const FLAC__StreamMetadata_Unknown *block, uint32_t block_length)
 {
 	if(write_cb(block->data, 1, block_length, handle) != block_length)
 		return false;
@@ -2870,7 +2984,7 @@
 	return read_metadata_block_header_(iterator);
 }
 
-FLAC__bool write_metadata_block_stationary_with_padding_(FLAC__Metadata_SimpleIterator *iterator, FLAC__StreamMetadata *block, unsigned padding_length, FLAC__bool padding_is_last)
+FLAC__bool write_metadata_block_stationary_with_padding_(FLAC__Metadata_SimpleIterator *iterator, FLAC__StreamMetadata *block, uint32_t padding_length, FLAC__bool padding_is_last)
 {
 	FLAC__StreamMetadata *padding;
 
@@ -2989,11 +3103,11 @@
  * 2: seek error
  * 3: not a FLAC file
  */
-unsigned seek_to_first_metadata_block_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__IOCallback_Seek seek_cb)
+uint32_t seek_to_first_metadata_block_cb_(FLAC__IOHandle handle, FLAC__IOCallback_Read read_cb, FLAC__IOCallback_Seek seek_cb)
 {
 	FLAC__byte buffer[4];
 	size_t n;
-	unsigned i;
+	uint32_t i;
 
 	FLAC__ASSERT(FLAC__STREAM_SYNC_LENGTH == sizeof(buffer));
 
@@ -3005,7 +3119,7 @@
 	else if(n != 4)
 		return 3;
 	else if(0 == memcmp(buffer, "ID3", 3)) {
-		unsigned tag_length = 0;
+		uint32_t tag_length = 0;
 
 		/* skip to the tag length */
 		if(seek_cb(handle, 2, SEEK_CUR) < 0)
@@ -3039,7 +3153,7 @@
 		return 3;
 }
 
-unsigned seek_to_first_metadata_block_(FILE *f)
+uint32_t seek_to_first_metadata_block_(FILE *f)
 {
 	return seek_to_first_metadata_block_cb_((FLAC__IOHandle)f, (FLAC__IOCallback_Read)fread, fseek_wrapper_);
 }
@@ -3233,11 +3347,14 @@
 	va_list va;
 	int rc;
 
-	va_start (va, fmt);
-
 #if defined _MSC_VER
 	if (size == 0)
 		return 1024;
+#endif
+
+	va_start (va, fmt);
+
+#if defined _MSC_VER
 	rc = vsnprintf_s (str, size, _TRUNCATE, fmt, va);
 	if (rc < 0)
 		rc = size - 1;
@@ -3343,13 +3460,19 @@
 
 void set_file_stats_(const char *filename, struct flac_stat_s *stats)
 {
+#if defined(_POSIX_C_SOURCE) && (_POSIX_C_SOURCE >= 200809L) && !defined(_WIN32)
+	struct timespec srctime[2] = {};
+	srctime[0].tv_sec = stats->st_atime;
+	srctime[1].tv_sec = stats->st_mtime;
+#else
 	struct utimbuf srctime;
+	srctime.actime = stats->st_atime;
+	srctime.modtime = stats->st_mtime;
+#endif
 
 	FLAC__ASSERT(0 != filename);
 	FLAC__ASSERT(0 != stats);
 
-	srctime.actime = stats->st_atime;
-	srctime.modtime = stats->st_mtime;
 	(void)flac_chmod(filename, stats->st_mode);
 	(void)flac_utime(filename, &srctime);
 #if !defined _MSC_VER && !defined __BORLANDC__ && !defined __MINGW32__

diff --git a/src/libFLAC/metadata_object.c b/src/libFLAC/metadata_object.c
index 0456297..3f56b0e 100644
--- a/src/libFLAC/metadata_object.c
+++ b/src/libFLAC/metadata_object.c

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2001-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -62,19 +62,17 @@
  *  else ASSERT
  * malloc error leaves 'to' unchanged
  */
-static FLAC__bool copy_bytes_(FLAC__byte **to, const FLAC__byte *from, unsigned bytes)
+static FLAC__bool copy_bytes_(FLAC__byte **to, const FLAC__byte *from, uint32_t bytes)
 {
-	FLAC__ASSERT(0 != to);
-	if(bytes > 0 && 0 != from) {
+	FLAC__ASSERT(to != NULL);
+	if (bytes > 0 && from != NULL) {
 		FLAC__byte *x;
-		if(0 == (x = safe_malloc_(bytes)))
+		if ((x = safe_malloc_(bytes)) == NULL)
 			return false;
 		memcpy(x, from, bytes);
 		*to = x;
 	}
 	else {
-		FLAC__ASSERT(0 == from);
-		FLAC__ASSERT(bytes == 0);
 		*to = 0;
 	}
 	return true;
@@ -82,13 +80,12 @@
 
 #if 0 /* UNUSED */
 /* like copy_bytes_(), but free()s the original '*to' if the copy succeeds and the original '*to' is non-NULL */
-static FLAC__bool free_copy_bytes_(FLAC__byte **to, const FLAC__byte *from, unsigned bytes)
+static FLAC__bool free_copy_bytes_(FLAC__byte **to, const FLAC__byte *from, uint32_t bytes)
 {
 	FLAC__byte *copy;
-	FLAC__ASSERT(0 != to);
-	if(copy_bytes_(&copy, from, bytes)) {
-		if(*to)
-			free(*to);
+	FLAC__ASSERT(to != NULL);
+	if (copy_bytes_(&copy, from, bytes)) {
+		free(*to);
 		*to = copy;
 		return true;
 	}
@@ -99,10 +96,10 @@
 
 /* reallocate entry to 1 byte larger and add a terminating NUL */
 /* realloc() failure leaves entry unchanged */
-static FLAC__bool ensure_null_terminated_(FLAC__byte **entry, unsigned length)
+static FLAC__bool ensure_null_terminated_(FLAC__byte **entry, uint32_t length)
 {
-	FLAC__byte *x = safe_realloc_add_2op_(*entry, length, /*+*/1);
-	if(0 != x) {
+	FLAC__byte *x = safe_realloc_nofree_add_2op_(*entry, length, /*+*/1);
+	if (x != NULL) {
 		x[length] = '\0';
 		*entry = x;
 		return true;
@@ -118,10 +115,9 @@
 static FLAC__bool copy_cstring_(char **to, const char *from)
 {
 	char *copy = strdup(from);
-	FLAC__ASSERT(to);
-	if(copy) {
-		if(*to)
-			free(*to);
+	FLAC__ASSERT(to != NULL);
+	if (copy) {
+		free(*to);
 		*to = copy;
 		return true;
 	}
@@ -132,14 +128,16 @@
 static FLAC__bool copy_vcentry_(FLAC__StreamMetadata_VorbisComment_Entry *to, const FLAC__StreamMetadata_VorbisComment_Entry *from)
 {
 	to->length = from->length;
-	if(0 == from->entry) {
+	if (from->entry == 0) {
 		FLAC__ASSERT(from->length == 0);
-		to->entry = 0;
+		if ((to->entry = safe_malloc_(1)) == NULL)
+			return false;
+		to->entry[0] = '\0';
 	}
 	else {
 		FLAC__byte *x;
 		FLAC__ASSERT(from->length > 0);
-		if(0 == (x = safe_malloc_add_2op_(from->length, /*+*/1)))
+		if ((x = safe_malloc_add_2op_(from->length, /*+*/1)) == NULL)
 			return false;
 		memcpy(x, from->entry, from->length);
 		x[from->length] = '\0';
@@ -151,13 +149,13 @@
 static FLAC__bool copy_track_(FLAC__StreamMetadata_CueSheet_Track *to, const FLAC__StreamMetadata_CueSheet_Track *from)
 {
 	memcpy(to, from, sizeof(FLAC__StreamMetadata_CueSheet_Track));
-	if(0 == from->indices) {
+	if (from->indices == 0) {
 		FLAC__ASSERT(from->num_indices == 0);
 	}
 	else {
 		FLAC__StreamMetadata_CueSheet_Index *x;
 		FLAC__ASSERT(from->num_indices > 0);
-		if(0 == (x = safe_malloc_mul_2op_p(from->num_indices, /*times*/sizeof(FLAC__StreamMetadata_CueSheet_Index))))
+		if ((x = safe_malloc_mul_2op_p(from->num_indices, /*times*/sizeof(FLAC__StreamMetadata_CueSheet_Index))) == NULL)
 			return false;
 		memcpy(x, from->indices, from->num_indices * sizeof(FLAC__StreamMetadata_CueSheet_Index));
 		to->indices = x;
@@ -167,13 +165,13 @@
 
 static void seektable_calculate_length_(FLAC__StreamMetadata *object)
 {
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_SEEKTABLE);
 
 	object->length = object->data.seek_table.num_points * FLAC__STREAM_METADATA_SEEKPOINT_LENGTH;
 }
 
-static FLAC__StreamMetadata_SeekPoint *seekpoint_array_new_(unsigned num_points)
+static FLAC__StreamMetadata_SeekPoint *seekpoint_array_new_(uint32_t num_points)
 {
 	FLAC__StreamMetadata_SeekPoint *object_array;
 
@@ -181,9 +179,9 @@
 
 	object_array = safe_malloc_mul_2op_p(num_points, /*times*/sizeof(FLAC__StreamMetadata_SeekPoint));
 
-	if(0 != object_array) {
-		unsigned i;
-		for(i = 0; i < num_points; i++) {
+	if (object_array != NULL) {
+		uint32_t i;
+		for (i = 0; i < num_points; i++) {
 			object_array[i].sample_number = FLAC__STREAM_METADATA_SEEKPOINT_PLACEHOLDER;
 			object_array[i].stream_offset = 0;
 			object_array[i].frame_samples = 0;
@@ -195,54 +193,52 @@
 
 static void vorbiscomment_calculate_length_(FLAC__StreamMetadata *object)
 {
-	unsigned i;
+	uint32_t i;
 
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_VORBIS_COMMENT);
 
 	object->length = (FLAC__STREAM_METADATA_VORBIS_COMMENT_ENTRY_LENGTH_LEN) / 8;
 	object->length += object->data.vorbis_comment.vendor_string.length;
 	object->length += (FLAC__STREAM_METADATA_VORBIS_COMMENT_NUM_COMMENTS_LEN) / 8;
-	for(i = 0; i < object->data.vorbis_comment.num_comments; i++) {
+	for (i = 0; i < object->data.vorbis_comment.num_comments; i++) {
 		object->length += (FLAC__STREAM_METADATA_VORBIS_COMMENT_ENTRY_LENGTH_LEN / 8);
 		object->length += object->data.vorbis_comment.comments[i].length;
 	}
 }
 
-static FLAC__StreamMetadata_VorbisComment_Entry *vorbiscomment_entry_array_new_(unsigned num_comments)
+static FLAC__StreamMetadata_VorbisComment_Entry *vorbiscomment_entry_array_new_(uint32_t num_comments)
 {
 	FLAC__ASSERT(num_comments > 0);
 
 	return safe_calloc_(num_comments, sizeof(FLAC__StreamMetadata_VorbisComment_Entry));
 }
 
-static void vorbiscomment_entry_array_delete_(FLAC__StreamMetadata_VorbisComment_Entry *object_array, unsigned num_comments)
+static void vorbiscomment_entry_array_delete_(FLAC__StreamMetadata_VorbisComment_Entry *object_array, uint32_t num_comments)
 {
-	unsigned i;
+	uint32_t i;
 
-	FLAC__ASSERT(0 != object_array && num_comments > 0);
+	FLAC__ASSERT(object_array != NULL && num_comments > 0);
 
-	for(i = 0; i < num_comments; i++)
-		if(0 != object_array[i].entry)
-			free(object_array[i].entry);
+	for (i = 0; i < num_comments; i++)
+		free(object_array[i].entry);
 
-	if(0 != object_array)
-		free(object_array);
+	free(object_array);
 }
 
-static FLAC__StreamMetadata_VorbisComment_Entry *vorbiscomment_entry_array_copy_(const FLAC__StreamMetadata_VorbisComment_Entry *object_array, unsigned num_comments)
+static FLAC__StreamMetadata_VorbisComment_Entry *vorbiscomment_entry_array_copy_(const FLAC__StreamMetadata_VorbisComment_Entry *object_array, uint32_t num_comments)
 {
 	FLAC__StreamMetadata_VorbisComment_Entry *return_array;
 
-	FLAC__ASSERT(0 != object_array);
+	FLAC__ASSERT(object_array != NULL);
 	FLAC__ASSERT(num_comments > 0);
 
 	return_array = vorbiscomment_entry_array_new_(num_comments);
 
-	if(0 != return_array) {
-		unsigned i;
+	if (return_array != NULL) {
+		uint32_t i;
 
-		for(i = 0; i < num_comments; i++) {
-			if(!copy_vcentry_(return_array+i, object_array+i)) {
+		for (i = 0; i < num_comments; i++) {
+			if (!copy_vcentry_(return_array+i, object_array+i)) {
 				vorbiscomment_entry_array_delete_(return_array, num_comments);
 				return 0;
 			}
@@ -256,18 +252,18 @@
 {
 	FLAC__byte *save;
 
-	FLAC__ASSERT(0 != object);
-	FLAC__ASSERT(0 != dest);
-	FLAC__ASSERT(0 != src);
+	FLAC__ASSERT(object != NULL);
+	FLAC__ASSERT(dest != NULL);
+	FLAC__ASSERT(src != NULL);
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_VORBIS_COMMENT);
-	FLAC__ASSERT((0 != src->entry && src->length > 0) || (0 == src->entry && src->length == 0));
+	FLAC__ASSERT((src->entry != NULL && src->length > 0) || (src->entry == NULL && src->length == 0));
 
 	save = dest->entry;
 
-	if(0 != src->entry) {
-		if(copy) {
+	if (src->entry != NULL) {
+		if (copy) {
 			/* do the copy first so that if we fail we leave the dest object untouched */
-			if(!copy_vcentry_(dest, src))
+			if (!copy_vcentry_(dest, src))
 				return false;
 		}
 		else {
@@ -281,7 +277,7 @@
 			 * precise, the 'own' flavor would be a separate function with a
 			 * non-const source pointer.  But it's not, so we hack away.
 			 */
-			if(!ensure_null_terminated_((FLAC__byte**)(&src->entry), src->length))
+			if (!ensure_null_terminated_((FLAC__byte**)(&src->entry), src->length))
 				return false;
 			*dest = *src;
 		}
@@ -291,23 +287,22 @@
 		*dest = *src;
 	}
 
-	if(0 != save)
-		free(save);
+	free(save);
 
 	vorbiscomment_calculate_length_(object);
 	return true;
 }
 
-static int vorbiscomment_find_entry_from_(const FLAC__StreamMetadata *object, unsigned offset, const char *field_name, unsigned field_name_length)
+static int vorbiscomment_find_entry_from_(const FLAC__StreamMetadata *object, uint32_t offset, const char *field_name, uint32_t field_name_length)
 {
-	unsigned i;
+	uint32_t i;
 
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_VORBIS_COMMENT);
-	FLAC__ASSERT(0 != field_name);
+	FLAC__ASSERT(field_name != NULL);
 
-	for(i = offset; i < object->data.vorbis_comment.num_comments; i++) {
-		if(FLAC__metadata_object_vorbiscomment_entry_matches(object->data.vorbis_comment.comments[i], field_name, field_name_length))
+	for (i = offset; i < object->data.vorbis_comment.num_comments; i++) {
+		if (FLAC__metadata_object_vorbiscomment_entry_matches(object->data.vorbis_comment.comments[i], field_name, field_name_length))
 			return (int)i;
 	}
 
@@ -316,7 +311,7 @@
 
 static void cuesheet_calculate_length_(FLAC__StreamMetadata *object)
 {
-	unsigned i;
+	uint32_t i;
 
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_CUESHEET);
 
@@ -338,7 +333,7 @@
 		FLAC__STREAM_METADATA_CUESHEET_TRACK_NUM_INDICES_LEN
 	) / 8;
 
-	for(i = 0; i < object->data.cue_sheet.num_tracks; i++) {
+	for (i = 0; i < object->data.cue_sheet.num_tracks; i++) {
 		object->length += object->data.cue_sheet.tracks[i].num_indices * (
 			FLAC__STREAM_METADATA_CUESHEET_INDEX_OFFSET_LEN +
 			FLAC__STREAM_METADATA_CUESHEET_INDEX_NUMBER_LEN +
@@ -347,51 +342,50 @@
 	}
 }
 
-static FLAC__StreamMetadata_CueSheet_Index *cuesheet_track_index_array_new_(unsigned num_indices)
+static FLAC__StreamMetadata_CueSheet_Index *cuesheet_track_index_array_new_(uint32_t num_indices)
 {
 	FLAC__ASSERT(num_indices > 0);
 
 	return safe_calloc_(num_indices, sizeof(FLAC__StreamMetadata_CueSheet_Index));
 }
 
-static FLAC__StreamMetadata_CueSheet_Track *cuesheet_track_array_new_(unsigned num_tracks)
+static FLAC__StreamMetadata_CueSheet_Track *cuesheet_track_array_new_(uint32_t num_tracks)
 {
 	FLAC__ASSERT(num_tracks > 0);
 
 	return safe_calloc_(num_tracks, sizeof(FLAC__StreamMetadata_CueSheet_Track));
 }
 
-static void cuesheet_track_array_delete_(FLAC__StreamMetadata_CueSheet_Track *object_array, unsigned num_tracks)
+static void cuesheet_track_array_delete_(FLAC__StreamMetadata_CueSheet_Track *object_array, uint32_t num_tracks)
 {
-	unsigned i;
+	uint32_t i;
 
-	FLAC__ASSERT(0 != object_array && num_tracks > 0);
+	FLAC__ASSERT(object_array != NULL && num_tracks > 0);
 
-	for(i = 0; i < num_tracks; i++) {
-		if(0 != object_array[i].indices) {
+	for (i = 0; i < num_tracks; i++) {
+		if (object_array[i].indices != 0) {
 			FLAC__ASSERT(object_array[i].num_indices > 0);
 			free(object_array[i].indices);
 		}
 	}
 
-	if(0 != object_array)
-		free(object_array);
+	free(object_array);
 }
 
-static FLAC__StreamMetadata_CueSheet_Track *cuesheet_track_array_copy_(const FLAC__StreamMetadata_CueSheet_Track *object_array, unsigned num_tracks)
+static FLAC__StreamMetadata_CueSheet_Track *cuesheet_track_array_copy_(const FLAC__StreamMetadata_CueSheet_Track *object_array, uint32_t num_tracks)
 {
 	FLAC__StreamMetadata_CueSheet_Track *return_array;
 
-	FLAC__ASSERT(0 != object_array);
+	FLAC__ASSERT(object_array != NULL);
 	FLAC__ASSERT(num_tracks > 0);
 
 	return_array = cuesheet_track_array_new_(num_tracks);
 
-	if(0 != return_array) {
-		unsigned i;
+	if (return_array != NULL) {
+		uint32_t i;
 
-		for(i = 0; i < num_tracks; i++) {
-			if(!copy_track_(return_array+i, object_array+i)) {
+		for (i = 0; i < num_tracks; i++) {
+			if (!copy_track_(return_array+i, object_array+i)) {
 				cuesheet_track_array_delete_(return_array, num_tracks);
 				return 0;
 			}
@@ -405,25 +399,24 @@
 {
 	FLAC__StreamMetadata_CueSheet_Index *save;
 
-	FLAC__ASSERT(0 != object);
-	FLAC__ASSERT(0 != dest);
-	FLAC__ASSERT(0 != src);
+	FLAC__ASSERT(object != NULL);
+	FLAC__ASSERT(dest != NULL);
+	FLAC__ASSERT(src != NULL);
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_CUESHEET);
-	FLAC__ASSERT((0 != src->indices && src->num_indices > 0) || (0 == src->indices && src->num_indices == 0));
+	FLAC__ASSERT((src->indices != NULL && src->num_indices > 0) || (src->indices == NULL && src->num_indices == 0));
 
 	save = dest->indices;
 
 	/* do the copy first so that if we fail we leave the object untouched */
-	if(copy) {
-		if(!copy_track_(dest, src))
+	if (copy) {
+		if (!copy_track_(dest, src))
 			return false;
 	}
 	else {
 		*dest = *src;
 	}
 
-	if(0 != save)
-		free(save);
+	free(save);
 
 	cuesheet_calculate_length_(object);
 	return true;
@@ -440,11 +433,11 @@
 {
 	FLAC__StreamMetadata *object;
 
-	if(type > FLAC__MAX_METADATA_TYPE)
+	if (type > FLAC__MAX_METADATA_TYPE)
 		return 0;
 
 	object = calloc(1, sizeof(FLAC__StreamMetadata));
-	if(0 != object) {
+	if (object != NULL) {
 		object->is_last = false;
 		object->type = type;
 		switch(type) {
@@ -470,8 +463,8 @@
 				*/
 				break;
 			case FLAC__METADATA_TYPE_VORBIS_COMMENT:
-				object->data.vorbis_comment.vendor_string.length = (unsigned)strlen(FLAC__VENDOR_STRING);
-				if(!copy_bytes_(&object->data.vorbis_comment.vendor_string.entry, (const FLAC__byte*)FLAC__VENDOR_STRING, object->data.vorbis_comment.vendor_string.length+1)) {
+				object->data.vorbis_comment.vendor_string.length = (uint32_t)strlen(FLAC__VENDOR_STRING);
+				if (!copy_bytes_(&object->data.vorbis_comment.vendor_string.entry, (const FLAC__byte*)FLAC__VENDOR_STRING, object->data.vorbis_comment.vendor_string.length+1)) {
 					free(object);
 					return 0;
 				}
@@ -504,13 +497,12 @@
 				object->data.picture.data = 0;
 				*/
 				/* now initialize mime_type and description with empty strings to make things easier on the client */
-				if(!copy_cstring_(&object->data.picture.mime_type, "")) {
+				if (!copy_cstring_(&object->data.picture.mime_type, "")) {
 					free(object);
 					return 0;
 				}
-				if(!copy_cstring_((char**)(&object->data.picture.description), "")) {
-					if(object->data.picture.mime_type)
-						free(object->data.picture.mime_type);
+				if (!copy_cstring_((char**)(&object->data.picture.description), "")) {
+					free(object->data.picture.mime_type);
 					free(object);
 					return 0;
 				}
@@ -531,9 +523,9 @@
 {
 	FLAC__StreamMetadata *to;
 
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 
-	if(0 != (to = FLAC__metadata_object_new(object->type))) {
+	if ((to = FLAC__metadata_object_new(object->type)) != NULL) {
 		to->is_last = object->is_last;
 		to->type = object->type;
 		to->length = object->length;
@@ -544,44 +536,43 @@
 			case FLAC__METADATA_TYPE_PADDING:
 				break;
 			case FLAC__METADATA_TYPE_APPLICATION:
-				if(to->length < FLAC__STREAM_METADATA_APPLICATION_ID_LEN / 8) { /* underflow check */
+				if (to->length < FLAC__STREAM_METADATA_APPLICATION_ID_LEN / 8) { /* underflow check */
 					FLAC__metadata_object_delete(to);
 					return 0;
 				}
 				memcpy(&to->data.application.id, &object->data.application.id, FLAC__STREAM_METADATA_APPLICATION_ID_LEN / 8);
-				if(!copy_bytes_(&to->data.application.data, object->data.application.data, object->length - FLAC__STREAM_METADATA_APPLICATION_ID_LEN / 8)) {
+				if (!copy_bytes_(&to->data.application.data, object->data.application.data, object->length - FLAC__STREAM_METADATA_APPLICATION_ID_LEN / 8)) {
 					FLAC__metadata_object_delete(to);
 					return 0;
 				}
 				break;
 			case FLAC__METADATA_TYPE_SEEKTABLE:
 				to->data.seek_table.num_points = object->data.seek_table.num_points;
-				if(to->data.seek_table.num_points > UINT32_MAX / sizeof(FLAC__StreamMetadata_SeekPoint)) { /* overflow check */
+				if (to->data.seek_table.num_points > UINT32_MAX / sizeof(FLAC__StreamMetadata_SeekPoint)) { /* overflow check */
 					FLAC__metadata_object_delete(to);
 					return 0;
 				}
-				if(!copy_bytes_((FLAC__byte**)&to->data.seek_table.points, (FLAC__byte*)object->data.seek_table.points, object->data.seek_table.num_points * sizeof(FLAC__StreamMetadata_SeekPoint))) {
+				if (!copy_bytes_((FLAC__byte**)&to->data.seek_table.points, (FLAC__byte*)object->data.seek_table.points, object->data.seek_table.num_points * sizeof(FLAC__StreamMetadata_SeekPoint))) {
 					FLAC__metadata_object_delete(to);
 					return 0;
 				}
 				break;
 			case FLAC__METADATA_TYPE_VORBIS_COMMENT:
-				if(0 != to->data.vorbis_comment.vendor_string.entry) {
+				if (to->data.vorbis_comment.vendor_string.entry != NULL) {
 					free(to->data.vorbis_comment.vendor_string.entry);
 					to->data.vorbis_comment.vendor_string.entry = 0;
 				}
-				if(!copy_vcentry_(&to->data.vorbis_comment.vendor_string, &object->data.vorbis_comment.vendor_string)) {
+				if (!copy_vcentry_(&to->data.vorbis_comment.vendor_string, &object->data.vorbis_comment.vendor_string)) {
 					FLAC__metadata_object_delete(to);
 					return 0;
 				}
-				if(object->data.vorbis_comment.num_comments == 0) {
-					FLAC__ASSERT(0 == object->data.vorbis_comment.comments);
+				if (object->data.vorbis_comment.num_comments == 0) {
 					to->data.vorbis_comment.comments = 0;
 				}
 				else {
-					FLAC__ASSERT(0 != object->data.vorbis_comment.comments);
 					to->data.vorbis_comment.comments = vorbiscomment_entry_array_copy_(object->data.vorbis_comment.comments, object->data.vorbis_comment.num_comments);
-					if(0 == to->data.vorbis_comment.comments) {
+					if (to->data.vorbis_comment.comments == NULL) {
+						to->data.vorbis_comment.num_comments = 0;
 						FLAC__metadata_object_delete(to);
 						return 0;
 					}
@@ -590,13 +581,13 @@
 				break;
 			case FLAC__METADATA_TYPE_CUESHEET:
 				memcpy(&to->data.cue_sheet, &object->data.cue_sheet, sizeof(FLAC__StreamMetadata_CueSheet));
-				if(object->data.cue_sheet.num_tracks == 0) {
-					FLAC__ASSERT(0 == object->data.cue_sheet.tracks);
+				if (object->data.cue_sheet.num_tracks == 0) {
+					FLAC__ASSERT(object->data.cue_sheet.tracks == NULL);
 				}
 				else {
-					FLAC__ASSERT(0 != object->data.cue_sheet.tracks);
+					FLAC__ASSERT(object->data.cue_sheet.tracks != 0);
 					to->data.cue_sheet.tracks = cuesheet_track_array_copy_(object->data.cue_sheet.tracks, object->data.cue_sheet.num_tracks);
-					if(0 == to->data.cue_sheet.tracks) {
+					if (to->data.cue_sheet.tracks == NULL) {
 						FLAC__metadata_object_delete(to);
 						return 0;
 					}
@@ -604,11 +595,11 @@
 				break;
 			case FLAC__METADATA_TYPE_PICTURE:
 				to->data.picture.type = object->data.picture.type;
-				if(!copy_cstring_(&to->data.picture.mime_type, object->data.picture.mime_type)) {
+				if (!copy_cstring_(&to->data.picture.mime_type, object->data.picture.mime_type)) {
 					FLAC__metadata_object_delete(to);
 					return 0;
 				}
-				if(!copy_cstring_((char**)(&to->data.picture.description), (const char*)object->data.picture.description)) {
+				if (!copy_cstring_((char**)(&to->data.picture.description), (const char*)object->data.picture.description)) {
 					FLAC__metadata_object_delete(to);
 					return 0;
 				}
@@ -617,13 +608,13 @@
 				to->data.picture.depth = object->data.picture.depth;
 				to->data.picture.colors = object->data.picture.colors;
 				to->data.picture.data_length = object->data.picture.data_length;
-				if(!copy_bytes_((&to->data.picture.data), object->data.picture.data, object->data.picture.data_length)) {
+				if (!copy_bytes_((&to->data.picture.data), object->data.picture.data, object->data.picture.data_length)) {
 					FLAC__metadata_object_delete(to);
 					return 0;
 				}
 				break;
 			default:
-				if(!copy_bytes_(&to->data.unknown.data, object->data.unknown.data, object->length)) {
+				if (!copy_bytes_(&to->data.unknown.data, object->data.unknown.data, object->length)) {
 					FLAC__metadata_object_delete(to);
 					return 0;
 				}
@@ -636,58 +627,62 @@
 
 void FLAC__metadata_object_delete_data(FLAC__StreamMetadata *object)
 {
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 
 	switch(object->type) {
 		case FLAC__METADATA_TYPE_STREAMINFO:
 		case FLAC__METADATA_TYPE_PADDING:
 			break;
 		case FLAC__METADATA_TYPE_APPLICATION:
-			if(0 != object->data.application.data) {
+			if (object->data.application.data != NULL) {
 				free(object->data.application.data);
-				object->data.application.data = 0;
+				object->data.application.data = NULL;
 			}
 			break;
 		case FLAC__METADATA_TYPE_SEEKTABLE:
-			if(0 != object->data.seek_table.points) {
+			if (object->data.seek_table.points != NULL) {
 				free(object->data.seek_table.points);
-				object->data.seek_table.points = 0;
+				object->data.seek_table.points = NULL;
 			}
 			break;
 		case FLAC__METADATA_TYPE_VORBIS_COMMENT:
-			if(0 != object->data.vorbis_comment.vendor_string.entry) {
+			if (object->data.vorbis_comment.vendor_string.entry != NULL) {
 				free(object->data.vorbis_comment.vendor_string.entry);
 				object->data.vorbis_comment.vendor_string.entry = 0;
 			}
-			if(0 != object->data.vorbis_comment.comments) {
+			if (object->data.vorbis_comment.comments != NULL) {
 				FLAC__ASSERT(object->data.vorbis_comment.num_comments > 0);
 				vorbiscomment_entry_array_delete_(object->data.vorbis_comment.comments, object->data.vorbis_comment.num_comments);
+				object->data.vorbis_comment.comments = NULL;
+				object->data.vorbis_comment.num_comments = 0;
 			}
 			break;
 		case FLAC__METADATA_TYPE_CUESHEET:
-			if(0 != object->data.cue_sheet.tracks) {
+			if (object->data.cue_sheet.tracks != NULL) {
 				FLAC__ASSERT(object->data.cue_sheet.num_tracks > 0);
 				cuesheet_track_array_delete_(object->data.cue_sheet.tracks, object->data.cue_sheet.num_tracks);
+				object->data.cue_sheet.tracks = NULL;
+				object->data.cue_sheet.num_tracks = 0;
 			}
 			break;
 		case FLAC__METADATA_TYPE_PICTURE:
-			if(0 != object->data.picture.mime_type) {
+			if (object->data.picture.mime_type != NULL) {
 				free(object->data.picture.mime_type);
-				object->data.picture.mime_type = 0;
+				object->data.picture.mime_type = NULL;
 			}
-			if(0 != object->data.picture.description) {
+			if (object->data.picture.description != NULL) {
 				free(object->data.picture.description);
-				object->data.picture.description = 0;
+				object->data.picture.description = NULL;
 			}
-			if(0 != object->data.picture.data) {
+			if (object->data.picture.data != NULL) {
 				free(object->data.picture.data);
-				object->data.picture.data = 0;
+				object->data.picture.data = NULL;
 			}
 			break;
 		default:
-			if(0 != object->data.unknown.data) {
+			if (object->data.unknown.data != NULL) {
 				free(object->data.unknown.data);
-				object->data.unknown.data = 0;
+				object->data.unknown.data = NULL;
 			}
 			break;
 	}
@@ -701,58 +696,58 @@
 
 static FLAC__bool compare_block_data_streaminfo_(const FLAC__StreamMetadata_StreamInfo *block1, const FLAC__StreamMetadata_StreamInfo *block2)
 {
-	if(block1->min_blocksize != block2->min_blocksize)
+	if (block1->min_blocksize != block2->min_blocksize)
 		return false;
-	if(block1->max_blocksize != block2->max_blocksize)
+	if (block1->max_blocksize != block2->max_blocksize)
 		return false;
-	if(block1->min_framesize != block2->min_framesize)
+	if (block1->min_framesize != block2->min_framesize)
 		return false;
-	if(block1->max_framesize != block2->max_framesize)
+	if (block1->max_framesize != block2->max_framesize)
 		return false;
-	if(block1->sample_rate != block2->sample_rate)
+	if (block1->sample_rate != block2->sample_rate)
 		return false;
-	if(block1->channels != block2->channels)
+	if (block1->channels != block2->channels)
 		return false;
-	if(block1->bits_per_sample != block2->bits_per_sample)
+	if (block1->bits_per_sample != block2->bits_per_sample)
 		return false;
-	if(block1->total_samples != block2->total_samples)
+	if (block1->total_samples != block2->total_samples)
 		return false;
-	if(0 != memcmp(block1->md5sum, block2->md5sum, 16))
+	if (memcmp(block1->md5sum, block2->md5sum, 16) != 0)
 		return false;
 	return true;
 }
 
-static FLAC__bool compare_block_data_application_(const FLAC__StreamMetadata_Application *block1, const FLAC__StreamMetadata_Application *block2, unsigned block_length)
+static FLAC__bool compare_block_data_application_(const FLAC__StreamMetadata_Application *block1, const FLAC__StreamMetadata_Application *block2, uint32_t block_length)
 {
-	FLAC__ASSERT(0 != block1);
-	FLAC__ASSERT(0 != block2);
+	FLAC__ASSERT(block1 != NULL);
+	FLAC__ASSERT(block2 != NULL);
 	FLAC__ASSERT(block_length >= sizeof(block1->id));
 
-	if(0 != memcmp(block1->id, block2->id, sizeof(block1->id)))
+	if (memcmp(block1->id, block2->id, sizeof(block1->id)) != 0)
 		return false;
-	if(0 != block1->data && 0 != block2->data)
-		return 0 == memcmp(block1->data, block2->data, block_length - sizeof(block1->id));
+	if (block1->data != NULL && block2->data != NULL)
+		return memcmp(block1->data, block2->data, block_length - sizeof(block1->id)) == 0;
 	else
 		return block1->data == block2->data;
 }
 
 static FLAC__bool compare_block_data_seektable_(const FLAC__StreamMetadata_SeekTable *block1, const FLAC__StreamMetadata_SeekTable *block2)
 {
-	unsigned i;
+	uint32_t i;
 
-	FLAC__ASSERT(0 != block1);
-	FLAC__ASSERT(0 != block2);
+	FLAC__ASSERT(block1 != NULL);
+	FLAC__ASSERT(block2 != NULL);
 
-	if(block1->num_points != block2->num_points)
+	if (block1->num_points != block2->num_points)
 		return false;
 
-	if(0 != block1->points && 0 != block2->points) {
-		for(i = 0; i < block1->num_points; i++) {
-			if(block1->points[i].sample_number != block2->points[i].sample_number)
+	if (block1->points != NULL && block2->points != NULL) {
+		for (i = 0; i < block1->num_points; i++) {
+			if (block1->points[i].sample_number != block2->points[i].sample_number)
 				return false;
-			if(block1->points[i].stream_offset != block2->points[i].stream_offset)
+			if (block1->points[i].stream_offset != block2->points[i].stream_offset)
 				return false;
-			if(block1->points[i].frame_samples != block2->points[i].frame_samples)
+			if (block1->points[i].frame_samples != block2->points[i].frame_samples)
 				return false;
 		}
 		return true;
@@ -763,27 +758,27 @@
 
 static FLAC__bool compare_block_data_vorbiscomment_(const FLAC__StreamMetadata_VorbisComment *block1, const FLAC__StreamMetadata_VorbisComment *block2)
 {
-	unsigned i;
+	uint32_t i;
 
-	if(block1->vendor_string.length != block2->vendor_string.length)
+	if (block1->vendor_string.length != block2->vendor_string.length)
 		return false;
 
-	if(0 != block1->vendor_string.entry && 0 != block2->vendor_string.entry) {
-		if(0 != memcmp(block1->vendor_string.entry, block2->vendor_string.entry, block1->vendor_string.length))
+	if (block1->vendor_string.entry != NULL && block2->vendor_string.entry != NULL) {
+		if (memcmp(block1->vendor_string.entry, block2->vendor_string.entry, block1->vendor_string.length) != 0)
 			return false;
 	}
-	else if(block1->vendor_string.entry != block2->vendor_string.entry)
+	else if (block1->vendor_string.entry != block2->vendor_string.entry)
 		return false;
 
-	if(block1->num_comments != block2->num_comments)
+	if (block1->num_comments != block2->num_comments)
 		return false;
 
-	for(i = 0; i < block1->num_comments; i++) {
-		if(0 != block1->comments[i].entry && 0 != block2->comments[i].entry) {
-			if(0 != memcmp(block1->comments[i].entry, block2->comments[i].entry, block1->comments[i].length))
+	for (i = 0; i < block1->num_comments; i++) {
+		if (block1->comments[i].entry != NULL && block2->comments[i].entry != NULL) {
+			if (memcmp(block1->comments[i].entry, block2->comments[i].entry, block1->comments[i].length) != 0)
 				return false;
 		}
-		else if(block1->comments[i].entry != block2->comments[i].entry)
+		else if (block1->comments[i].entry != block2->comments[i].entry)
 			return false;
 	}
 	return true;
@@ -791,99 +786,99 @@
 
 static FLAC__bool compare_block_data_cuesheet_(const FLAC__StreamMetadata_CueSheet *block1, const FLAC__StreamMetadata_CueSheet *block2)
 {
-	unsigned i, j;
+	uint32_t i, j;
 
-	if(0 != strcmp(block1->media_catalog_number, block2->media_catalog_number))
+	if (strcmp(block1->media_catalog_number, block2->media_catalog_number) != 0)
 		return false;
 
-	if(block1->lead_in != block2->lead_in)
+	if (block1->lead_in != block2->lead_in)
 		return false;
 
-	if(block1->is_cd != block2->is_cd)
+	if (block1->is_cd != block2->is_cd)
 		return false;
 
-	if(block1->num_tracks != block2->num_tracks)
+	if (block1->num_tracks != block2->num_tracks)
 		return false;
 
-	if(0 != block1->tracks && 0 != block2->tracks) {
+	if (block1->tracks != NULL && block2->tracks != NULL) {
 		FLAC__ASSERT(block1->num_tracks > 0);
-		for(i = 0; i < block1->num_tracks; i++) {
-			if(block1->tracks[i].offset != block2->tracks[i].offset)
+		for (i = 0; i < block1->num_tracks; i++) {
+			if (block1->tracks[i].offset != block2->tracks[i].offset)
 				return false;
-			if(block1->tracks[i].number != block2->tracks[i].number)
+			if (block1->tracks[i].number != block2->tracks[i].number)
 				return false;
-			if(0 != memcmp(block1->tracks[i].isrc, block2->tracks[i].isrc, sizeof(block1->tracks[i].isrc)))
+			if (memcmp(block1->tracks[i].isrc, block2->tracks[i].isrc, sizeof(block1->tracks[i].isrc)) != 0)
 				return false;
-			if(block1->tracks[i].type != block2->tracks[i].type)
+			if (block1->tracks[i].type != block2->tracks[i].type)
 				return false;
-			if(block1->tracks[i].pre_emphasis != block2->tracks[i].pre_emphasis)
+			if (block1->tracks[i].pre_emphasis != block2->tracks[i].pre_emphasis)
 				return false;
-			if(block1->tracks[i].num_indices != block2->tracks[i].num_indices)
+			if (block1->tracks[i].num_indices != block2->tracks[i].num_indices)
 				return false;
-			if(0 != block1->tracks[i].indices && 0 != block2->tracks[i].indices) {
+			if (block1->tracks[i].indices != NULL && block2->tracks[i].indices != NULL) {
 				FLAC__ASSERT(block1->tracks[i].num_indices > 0);
-				for(j = 0; j < block1->tracks[i].num_indices; j++) {
-					if(block1->tracks[i].indices[j].offset != block2->tracks[i].indices[j].offset)
+				for (j = 0; j < block1->tracks[i].num_indices; j++) {
+					if (block1->tracks[i].indices[j].offset != block2->tracks[i].indices[j].offset)
 						return false;
-					if(block1->tracks[i].indices[j].number != block2->tracks[i].indices[j].number)
+					if (block1->tracks[i].indices[j].number != block2->tracks[i].indices[j].number)
 						return false;
 				}
 			}
-			else if(block1->tracks[i].indices != block2->tracks[i].indices)
+			else if (block1->tracks[i].indices != block2->tracks[i].indices)
 				return false;
 		}
 	}
-	else if(block1->tracks != block2->tracks)
+	else if (block1->tracks != block2->tracks)
 		return false;
 	return true;
 }
 
 static FLAC__bool compare_block_data_picture_(const FLAC__StreamMetadata_Picture *block1, const FLAC__StreamMetadata_Picture *block2)
 {
-	if(block1->type != block2->type)
+	if (block1->type != block2->type)
 		return false;
-	if(block1->mime_type != block2->mime_type && (0 == block1->mime_type || 0 == block2->mime_type || strcmp(block1->mime_type, block2->mime_type)))
+	if (block1->mime_type != block2->mime_type && (block1->mime_type == 0 || block2->mime_type == 0 || strcmp(block1->mime_type, block2->mime_type)))
 		return false;
-	if(block1->description != block2->description && (0 == block1->description || 0 == block2->description || strcmp((const char *)block1->description, (const char *)block2->description)))
+	if (block1->description != block2->description && (block1->description == 0 || block2->description == 0 || strcmp((const char *)block1->description, (const char *)block2->description)))
 		return false;
-	if(block1->width != block2->width)
+	if (block1->width != block2->width)
 		return false;
-	if(block1->height != block2->height)
+	if (block1->height != block2->height)
 		return false;
-	if(block1->depth != block2->depth)
+	if (block1->depth != block2->depth)
 		return false;
-	if(block1->colors != block2->colors)
+	if (block1->colors != block2->colors)
 		return false;
-	if(block1->data_length != block2->data_length)
+	if (block1->data_length != block2->data_length)
 		return false;
-	if(block1->data != block2->data && (0 == block1->data || 0 == block2->data || memcmp(block1->data, block2->data, block1->data_length)))
+	if (block1->data != block2->data && (block1->data == NULL || block2->data == NULL || memcmp(block1->data, block2->data, block1->data_length)))
 		return false;
 	return true;
 }
 
-static FLAC__bool compare_block_data_unknown_(const FLAC__StreamMetadata_Unknown *block1, const FLAC__StreamMetadata_Unknown *block2, unsigned block_length)
+static FLAC__bool compare_block_data_unknown_(const FLAC__StreamMetadata_Unknown *block1, const FLAC__StreamMetadata_Unknown *block2, uint32_t block_length)
 {
-	FLAC__ASSERT(0 != block1);
-	FLAC__ASSERT(0 != block2);
+	FLAC__ASSERT(block1 != NULL);
+	FLAC__ASSERT(block2 != NULL);
 
-	if(0 != block1->data && 0 != block2->data)
-		return 0 == memcmp(block1->data, block2->data, block_length);
+	if (block1->data != NULL && block2->data != NULL)
+		return memcmp(block1->data, block2->data, block_length) == 0;
 	else
 		return block1->data == block2->data;
 }
 
 FLAC_API FLAC__bool FLAC__metadata_object_is_equal(const FLAC__StreamMetadata *block1, const FLAC__StreamMetadata *block2)
 {
-	FLAC__ASSERT(0 != block1);
-	FLAC__ASSERT(0 != block2);
+	FLAC__ASSERT(block1 != NULL);
+	FLAC__ASSERT(block2 != NULL);
 
-	if(block1->type != block2->type) {
+	if (block1->type != block2->type) {
 		return false;
 	}
-	if(block1->is_last != block2->is_last) {
+	if (block1->is_last != block2->is_last) {
 		return false;
 	}
-	if(block1->length != block2->length) {
+	if (block1->length != block2->length) {
 		return false;
 	}
 	switch(block1->type) {
@@ -906,42 +901,41 @@
 	}
 }
 
-FLAC_API FLAC__bool FLAC__metadata_object_application_set_data(FLAC__StreamMetadata *object, FLAC__byte *data, unsigned length, FLAC__bool copy)
+FLAC_API FLAC__bool FLAC__metadata_object_application_set_data(FLAC__StreamMetadata *object, FLAC__byte *data, uint32_t length, FLAC__bool copy)
 {
 	FLAC__byte *save;
 
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_APPLICATION);
-	FLAC__ASSERT((0 != data && length > 0) || (0 == data && length == 0 && copy == false));
+	FLAC__ASSERT((data != NULL && length > 0) || (data == NULL && length == 0 && copy == false));
 
 	save = object->data.application.data;
 
 	/* do the copy first so that if we fail we leave the object untouched */
-	if(copy) {
-		if(!copy_bytes_(&object->data.application.data, data, length))
+	if (copy) {
+		if (!copy_bytes_(&object->data.application.data, data, length))
 			return false;
 	}
 	else {
 		object->data.application.data = data;
 	}
 
-	if(0 != save)
-		free(save);
+	free(save);
 
 	object->length = FLAC__STREAM_METADATA_APPLICATION_ID_LEN / 8 + length;
 	return true;
 }
 
-FLAC_API FLAC__bool FLAC__metadata_object_seektable_resize_points(FLAC__StreamMetadata *object, unsigned new_num_points)
+FLAC_API FLAC__bool FLAC__metadata_object_seektable_resize_points(FLAC__StreamMetadata *object, uint32_t new_num_points)
 {
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_SEEKTABLE);
 
-	if(0 == object->data.seek_table.points) {
+	if (object->data.seek_table.points == 0) {
 		FLAC__ASSERT(object->data.seek_table.num_points == 0);
-		if(0 == new_num_points)
+		if (new_num_points == 0)
 			return true;
-		else if(0 == (object->data.seek_table.points = seekpoint_array_new_(new_num_points)))
+		else if ((object->data.seek_table.points = seekpoint_array_new_(new_num_points)) == 0)
 			return false;
 	}
 	else {
@@ -949,22 +943,27 @@
 		const size_t new_size = new_num_points * sizeof(FLAC__StreamMetadata_SeekPoint);
 
 		/* overflow check */
-		if(new_num_points > UINT32_MAX / sizeof(FLAC__StreamMetadata_SeekPoint))
+		if (new_num_points > UINT32_MAX / sizeof(FLAC__StreamMetadata_SeekPoint))
 			return false;
 
 		FLAC__ASSERT(object->data.seek_table.num_points > 0);
 
-		if(new_size == 0) {
+		if (new_size == 0) {
 			free(object->data.seek_table.points);
 			object->data.seek_table.points = 0;
 		}
-		else if(0 == (object->data.seek_table.points = realloc(object->data.seek_table.points, new_size)))
-			return false;
+		else {
+			/* Leave object->data.seek_table.points untouched if realloc fails */
+			FLAC__StreamMetadata_SeekPoint *tmpptr;
+			if ((tmpptr = realloc(object->data.seek_table.points, new_size)) == NULL)
+				return false;
+			object->data.seek_table.points = tmpptr;
+		}
 
 		/* if growing, set new elements to placeholders */
-		if(new_size > old_size) {
-			unsigned i;
-			for(i = object->data.seek_table.num_points; i < new_num_points; i++) {
+		if (new_size > old_size) {
+			uint32_t i;
+			for (i = object->data.seek_table.num_points; i < new_num_points; i++) {
 				object->data.seek_table.points[i].sample_number = FLAC__STREAM_METADATA_SEEKPOINT_PLACEHOLDER;
 				object->data.seek_table.points[i].stream_offset = 0;
 				object->data.seek_table.points[i].frame_samples = 0;
@@ -978,28 +977,28 @@
 	return true;
 }
 
-FLAC_API void FLAC__metadata_object_seektable_set_point(FLAC__StreamMetadata *object, unsigned point_num, FLAC__StreamMetadata_SeekPoint point)
+FLAC_API void FLAC__metadata_object_seektable_set_point(FLAC__StreamMetadata *object, uint32_t point_num, FLAC__StreamMetadata_SeekPoint point)
 {
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_SEEKTABLE);
 	FLAC__ASSERT(point_num < object->data.seek_table.num_points);
 
 	object->data.seek_table.points[point_num] = point;
 }
 
-FLAC_API FLAC__bool FLAC__metadata_object_seektable_insert_point(FLAC__StreamMetadata *object, unsigned point_num, FLAC__StreamMetadata_SeekPoint point)
+FLAC_API FLAC__bool FLAC__metadata_object_seektable_insert_point(FLAC__StreamMetadata *object, uint32_t point_num, FLAC__StreamMetadata_SeekPoint point)
 {
 	int i;
 
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_SEEKTABLE);
 	FLAC__ASSERT(point_num <= object->data.seek_table.num_points);
 
-	if(!FLAC__metadata_object_seektable_resize_points(object, object->data.seek_table.num_points+1))
+	if (!FLAC__metadata_object_seektable_resize_points(object, object->data.seek_table.num_points+1))
 		return false;
 
 	/* move all points >= point_num forward one space */
-	for(i = (int)object->data.seek_table.num_points-1; i > (int)point_num; i--)
+	for (i = (int)object->data.seek_table.num_points-1; i > (int)point_num; i--)
 		object->data.seek_table.points[i] = object->data.seek_table.points[i-1];
 
 	FLAC__metadata_object_seektable_set_point(object, point_num, point);
@@ -1007,16 +1006,16 @@
 	return true;
 }
 
-FLAC_API FLAC__bool FLAC__metadata_object_seektable_delete_point(FLAC__StreamMetadata *object, unsigned point_num)
+FLAC_API FLAC__bool FLAC__metadata_object_seektable_delete_point(FLAC__StreamMetadata *object, uint32_t point_num)
 {
-	unsigned i;
+	uint32_t i;
 
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_SEEKTABLE);
 	FLAC__ASSERT(point_num < object->data.seek_table.num_points);
 
 	/* move all points > point_num backward one space */
-	for(i = point_num; i < object->data.seek_table.num_points-1; i++)
+	for (i = point_num; i < object->data.seek_table.num_points-1; i++)
 		object->data.seek_table.points[i] = object->data.seek_table.points[i+1];
 
 	return FLAC__metadata_object_seektable_resize_points(object, object->data.seek_table.num_points-1);
@@ -1024,18 +1023,18 @@
 
 FLAC_API FLAC__bool FLAC__metadata_object_seektable_is_legal(const FLAC__StreamMetadata *object)
 {
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_SEEKTABLE);
 
 	return FLAC__format_seektable_is_legal(&object->data.seek_table);
 }
 
-FLAC_API FLAC__bool FLAC__metadata_object_seektable_template_append_placeholders(FLAC__StreamMetadata *object, unsigned num)
+FLAC_API FLAC__bool FLAC__metadata_object_seektable_template_append_placeholders(FLAC__StreamMetadata *object, uint32_t num)
 {
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_SEEKTABLE);
 
-	if(num > 0)
+	if (num > 0)
 		/* WATCHOUT: we rely on the fact that growing the array adds PLACEHOLDERS at the end */
 		return FLAC__metadata_object_seektable_resize_points(object, object->data.seek_table.num_points + num);
 	else
@@ -1046,12 +1045,12 @@
 {
 	FLAC__StreamMetadata_SeekTable *seek_table;
 
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_SEEKTABLE);
 
 	seek_table = &object->data.seek_table;
 
-	if(!FLAC__metadata_object_seektable_resize_points(object, seek_table->num_points + 1))
+	if (!FLAC__metadata_object_seektable_resize_points(object, seek_table->num_points + 1))
 		return false;
 
 	seek_table->points[seek_table->num_points - 1].sample_number = sample_number;
@@ -1061,22 +1060,22 @@
 	return true;
 }
 
-FLAC_API FLAC__bool FLAC__metadata_object_seektable_template_append_points(FLAC__StreamMetadata *object, FLAC__uint64 sample_numbers[], unsigned num)
+FLAC_API FLAC__bool FLAC__metadata_object_seektable_template_append_points(FLAC__StreamMetadata *object, FLAC__uint64 sample_numbers[], uint32_t num)
 {
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_SEEKTABLE);
-	FLAC__ASSERT(0 != sample_numbers || num == 0);
+	FLAC__ASSERT(sample_numbers != 0 || num == 0);
 
-	if(num > 0) {
+	if (num > 0) {
 		FLAC__StreamMetadata_SeekTable *seek_table = &object->data.seek_table;
-		unsigned i, j;
+		uint32_t i, j;
 
 		i = seek_table->num_points;
 
-		if(!FLAC__metadata_object_seektable_resize_points(object, seek_table->num_points + num))
+		if (!FLAC__metadata_object_seektable_resize_points(object, seek_table->num_points + num))
 			return false;
 
-		for(j = 0; j < num; i++, j++) {
+		for (j = 0; j < num; i++, j++) {
 			seek_table->points[i].sample_number = sample_numbers[j];
 			seek_table->points[i].stream_offset = 0;
 			seek_table->points[i].frame_samples = 0;
@@ -1086,22 +1085,22 @@
 	return true;
 }
 
-FLAC_API FLAC__bool FLAC__metadata_object_seektable_template_append_spaced_points(FLAC__StreamMetadata *object, unsigned num, FLAC__uint64 total_samples)
+FLAC_API FLAC__bool FLAC__metadata_object_seektable_template_append_spaced_points(FLAC__StreamMetadata *object, uint32_t num, FLAC__uint64 total_samples)
 {
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_SEEKTABLE);
 	FLAC__ASSERT(total_samples > 0);
 
-	if(num > 0 && total_samples > 0) {
+	if (num > 0 && total_samples > 0) {
 		FLAC__StreamMetadata_SeekTable *seek_table = &object->data.seek_table;
-		unsigned i, j;
+		uint32_t i, j;
 
 		i = seek_table->num_points;
 
-		if(!FLAC__metadata_object_seektable_resize_points(object, seek_table->num_points + num))
+		if (!FLAC__metadata_object_seektable_resize_points(object, seek_table->num_points + num))
 			return false;
 
-		for(j = 0; j < num; i++, j++) {
+		for (j = 0; j < num; i++, j++) {
 			seek_table->points[i].sample_number = total_samples * (FLAC__uint64)j / (FLAC__uint64)num;
 			seek_table->points[i].stream_offset = 0;
 			seek_table->points[i].frame_samples = 0;
@@ -1111,30 +1110,37 @@
 	return true;
 }
 
-FLAC_API FLAC__bool FLAC__metadata_object_seektable_template_append_spaced_points_by_samples(FLAC__StreamMetadata *object, unsigned samples, FLAC__uint64 total_samples)
+FLAC_API FLAC__bool FLAC__metadata_object_seektable_template_append_spaced_points_by_samples(FLAC__StreamMetadata *object, uint32_t samples, FLAC__uint64 total_samples)
 {
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_SEEKTABLE);
 	FLAC__ASSERT(samples > 0);
 	FLAC__ASSERT(total_samples > 0);
 
-	if(samples > 0 && total_samples > 0) {
+	if (samples > 0 && total_samples > 0) {
 		FLAC__StreamMetadata_SeekTable *seek_table = &object->data.seek_table;
-		unsigned i, j;
+		uint32_t i, j;
 		FLAC__uint64 num, sample;
 
 		num = 1 + total_samples / samples; /* 1+ for the first sample at 0 */
 		/* now account for the fact that we don't place a seekpoint at "total_samples" since samples are number from 0: */
-		if(total_samples % samples == 0)
+		if (total_samples % samples == 0)
 			num--;
 
+		/* Put a strict upper bound on the number of allowed seek points. */
+		if (num > 32768) {
+			/* Set the bound and recalculate samples accordingly. */
+			num = 32768;
+			samples = (uint32_t)(total_samples / num);
+		}
+
 		i = seek_table->num_points;
 
-		if(!FLAC__metadata_object_seektable_resize_points(object, seek_table->num_points + (unsigned)num))
+		if (!FLAC__metadata_object_seektable_resize_points(object, seek_table->num_points + (uint32_t)num))
 			return false;
 
 		sample = 0;
-		for(j = 0; j < num; i++, j++, sample += samples) {
+		for (j = 0; j < num; i++, j++, sample += samples) {
 			seek_table->points[i].sample_number = sample;
 			seek_table->points[i].stream_offset = 0;
 			seek_table->points[i].frame_samples = 0;
@@ -1146,9 +1152,9 @@
 
 FLAC_API FLAC__bool FLAC__metadata_object_seektable_template_sort(FLAC__StreamMetadata *object, FLAC__bool compact)
 {
-	unsigned unique;
+	uint32_t unique;
 
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_SEEKTABLE);
 
 	unique = FLAC__format_seektable_sort(&object->data.seek_table);
@@ -1158,51 +1164,76 @@
 
 FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_set_vendor_string(FLAC__StreamMetadata *object, FLAC__StreamMetadata_VorbisComment_Entry entry, FLAC__bool copy)
 {
-	if(!FLAC__format_vorbiscomment_entry_value_is_legal(entry.entry, entry.length))
+	if (!FLAC__format_vorbiscomment_entry_value_is_legal(entry.entry, entry.length))
 		return false;
 	return vorbiscomment_set_entry_(object, &object->data.vorbis_comment.vendor_string, &entry, copy);
 }
 
-FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_resize_comments(FLAC__StreamMetadata *object, unsigned new_num_comments)
+FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_resize_comments(FLAC__StreamMetadata *object, uint32_t new_num_comments)
 {
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_VORBIS_COMMENT);
 
-	if(0 == object->data.vorbis_comment.comments) {
+	if (object->data.vorbis_comment.comments == NULL) {
 		FLAC__ASSERT(object->data.vorbis_comment.num_comments == 0);
-		if(0 == new_num_comments)
+		if (new_num_comments == 0)
 			return true;
-		else if(0 == (object->data.vorbis_comment.comments = vorbiscomment_entry_array_new_(new_num_comments)))
-			return false;
+		else {
+			uint32_t i;
+			if ((object->data.vorbis_comment.comments = vorbiscomment_entry_array_new_(new_num_comments)) == NULL)
+				return false;
+			for (i = 0; i < new_num_comments; i++) {
+				object->data.vorbis_comment.comments[i].length = 0;
+				if ((object->data.vorbis_comment.comments[i].entry = safe_malloc_(1)) == NULL) {
+					object->data.vorbis_comment.num_comments = i+1;
+					return false;
+				}
+				object->data.vorbis_comment.comments[i].entry[0] = '\0';
+			}
+		}
 	}
 	else {
 		const size_t old_size = object->data.vorbis_comment.num_comments * sizeof(FLAC__StreamMetadata_VorbisComment_Entry);
 		const size_t new_size = new_num_comments * sizeof(FLAC__StreamMetadata_VorbisComment_Entry);
 
 		/* overflow check */
-		if(new_num_comments > UINT32_MAX / sizeof(FLAC__StreamMetadata_VorbisComment_Entry))
+		if (new_num_comments > UINT32_MAX / sizeof(FLAC__StreamMetadata_VorbisComment_Entry))
 			return false;
 
 		FLAC__ASSERT(object->data.vorbis_comment.num_comments > 0);
 
 		/* if shrinking, free the truncated entries */
-		if(new_num_comments < object->data.vorbis_comment.num_comments) {
-			unsigned i;
-			for(i = new_num_comments; i < object->data.vorbis_comment.num_comments; i++)
-				if(0 != object->data.vorbis_comment.comments[i].entry)
+		if (new_num_comments < object->data.vorbis_comment.num_comments) {
+			uint32_t i;
+			for (i = new_num_comments; i < object->data.vorbis_comment.num_comments; i++)
+				if (object->data.vorbis_comment.comments[i].entry != NULL)
 					free(object->data.vorbis_comment.comments[i].entry);
 		}
 
-		if(new_size == 0) {
+		if (new_size == 0) {
 			free(object->data.vorbis_comment.comments);
 			object->data.vorbis_comment.comments = 0;
 		}
-		else if(0 == (object->data.vorbis_comment.comments = realloc(object->data.vorbis_comment.comments, new_size)))
-			return false;
+		else {
+			/* Leave object->data.vorbis_comment.comments untouched if realloc fails */
+			FLAC__StreamMetadata_VorbisComment_Entry *tmpptr;
+			if ((tmpptr = realloc(object->data.vorbis_comment.comments, new_size)) == NULL)
+				return false;
+			object->data.vorbis_comment.comments = tmpptr;
+		}
 
 		/* if growing, zero all the length/pointers of new elements */
-		if(new_size > old_size)
-			memset(object->data.vorbis_comment.comments + object->data.vorbis_comment.num_comments, 0, new_size - old_size);
+		if (new_size > old_size) {
+			uint32_t i;
+			for (i = object->data.vorbis_comment.num_comments; i < new_num_comments; i++) {
+				object->data.vorbis_comment.comments[i].length = 0;
+				if ((object->data.vorbis_comment.comments[i].entry = safe_malloc_(1)) == NULL) {
+					object->data.vorbis_comment.num_comments = i+1;
+					return false;
+				}
+				object->data.vorbis_comment.comments[i].entry[0] = '\0';
+			}
+		}
 	}
 
 	object->data.vorbis_comment.num_comments = new_num_comments;
@@ -1211,52 +1242,54 @@
 	return true;
 }
 
-FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_set_comment(FLAC__StreamMetadata *object, unsigned comment_num, FLAC__StreamMetadata_VorbisComment_Entry entry, FLAC__bool copy)
+FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_set_comment(FLAC__StreamMetadata *object, uint32_t comment_num, FLAC__StreamMetadata_VorbisComment_Entry entry, FLAC__bool copy)
 {
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 	FLAC__ASSERT(comment_num < object->data.vorbis_comment.num_comments);
 
-	if(!FLAC__format_vorbiscomment_entry_is_legal(entry.entry, entry.length))
+	if (!FLAC__format_vorbiscomment_entry_is_legal(entry.entry, entry.length))
 		return false;
 	return vorbiscomment_set_entry_(object, &object->data.vorbis_comment.comments[comment_num], &entry, copy);
 }
 
-FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_insert_comment(FLAC__StreamMetadata *object, unsigned comment_num, FLAC__StreamMetadata_VorbisComment_Entry entry, FLAC__bool copy)
+FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_insert_comment(FLAC__StreamMetadata *object, uint32_t comment_num, FLAC__StreamMetadata_VorbisComment_Entry entry, FLAC__bool copy)
 {
 	FLAC__StreamMetadata_VorbisComment *vc;
+	FLAC__StreamMetadata_VorbisComment_Entry temp;
 
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_VORBIS_COMMENT);
 	FLAC__ASSERT(comment_num <= object->data.vorbis_comment.num_comments);
 
-	if(!FLAC__format_vorbiscomment_entry_is_legal(entry.entry, entry.length))
+	if (!FLAC__format_vorbiscomment_entry_is_legal(entry.entry, entry.length))
 		return false;
 
 	vc = &object->data.vorbis_comment;
 
-	if(!FLAC__metadata_object_vorbiscomment_resize_comments(object, vc->num_comments+1))
+	if (!FLAC__metadata_object_vorbiscomment_resize_comments(object, vc->num_comments+1))
 		return false;
 
 	/* move all comments >= comment_num forward one space */
+	/* reuse newly added empty comment */
+	temp = vc->comments[vc->num_comments-1];
 	memmove(&vc->comments[comment_num+1], &vc->comments[comment_num], sizeof(FLAC__StreamMetadata_VorbisComment_Entry)*(vc->num_comments-1-comment_num));
-	vc->comments[comment_num].length = 0;
-	vc->comments[comment_num].entry = 0;
+	vc->comments[comment_num] = temp;
 
 	return FLAC__metadata_object_vorbiscomment_set_comment(object, comment_num, entry, copy);
 }
 
 FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_append_comment(FLAC__StreamMetadata *object, FLAC__StreamMetadata_VorbisComment_Entry entry, FLAC__bool copy)
 {
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_VORBIS_COMMENT);
 	return FLAC__metadata_object_vorbiscomment_insert_comment(object, object->data.vorbis_comment.num_comments, entry, copy);
 }
 
 FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_replace_comment(FLAC__StreamMetadata *object, FLAC__StreamMetadata_VorbisComment_Entry entry, FLAC__bool all, FLAC__bool copy)
 {
-	FLAC__ASSERT(0 != entry.entry && entry.length > 0);
+	FLAC__ASSERT(entry.entry != NULL && entry.length > 0);
 
-	if(!FLAC__format_vorbiscomment_entry_is_legal(entry.entry, entry.length))
+	if (!FLAC__format_vorbiscomment_entry_is_legal(entry.entry, entry.length))
 		return false;
 
 	{
@@ -1264,27 +1297,25 @@
 		size_t field_name_length;
 		const FLAC__byte *eq = (FLAC__byte*)memchr(entry.entry, '=', entry.length);
 
-		FLAC__ASSERT(0 != eq);
-
-		if(0 == eq)
+		if (eq == NULL)
 			return false; /* double protection */
 
 		field_name_length = eq-entry.entry;
 
 		i = vorbiscomment_find_entry_from_(object, 0, (const char *)entry.entry, field_name_length);
-		if(i >= 0) {
-			unsigned indx = (unsigned)i;
-			if(!FLAC__metadata_object_vorbiscomment_set_comment(object, indx, entry, copy))
+		if (i >= 0) {
+			uint32_t indx = (uint32_t)i;
+			if (!FLAC__metadata_object_vorbiscomment_set_comment(object, indx, entry, copy))
 				return false;
 			entry = object->data.vorbis_comment.comments[indx];
 			indx++; /* skip over replaced comment */
-			if(all && indx < object->data.vorbis_comment.num_comments) {
+			if (all && indx < object->data.vorbis_comment.num_comments) {
 				i = vorbiscomment_find_entry_from_(object, indx, (const char *)entry.entry, field_name_length);
-				while(i >= 0) {
-					indx = (unsigned)i;
-					if(!FLAC__metadata_object_vorbiscomment_delete_comment(object, indx))
+				while (i >= 0) {
+					indx = (uint32_t)i;
+					if (!FLAC__metadata_object_vorbiscomment_delete_comment(object, indx))
 						return false;
-					if(indx < object->data.vorbis_comment.num_comments)
+					if (indx < object->data.vorbis_comment.num_comments)
 						i = vorbiscomment_find_entry_from_(object, indx, (const char *)entry.entry, field_name_length);
 					else
 						i = -1;
@@ -1297,19 +1328,18 @@
 	}
 }
 
-FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_delete_comment(FLAC__StreamMetadata *object, unsigned comment_num)
+FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_delete_comment(FLAC__StreamMetadata *object, uint32_t comment_num)
 {
 	FLAC__StreamMetadata_VorbisComment *vc;
 
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_VORBIS_COMMENT);
 	FLAC__ASSERT(comment_num < object->data.vorbis_comment.num_comments);
 
 	vc = &object->data.vorbis_comment;
 
 	/* free the comment at comment_num */
-	if(0 != vc->comments[comment_num].entry)
-		free(vc->comments[comment_num].entry);
+	free(vc->comments[comment_num].entry);
 
 	/* move all comments > comment_num backward one space */
 	memmove(&vc->comments[comment_num], &vc->comments[comment_num+1], sizeof(FLAC__StreamMetadata_VorbisComment_Entry)*(vc->num_comments-comment_num-1));
@@ -1321,20 +1351,20 @@
 
 FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_entry_from_name_value_pair(FLAC__StreamMetadata_VorbisComment_Entry *entry, const char *field_name, const char *field_value)
 {
-	FLAC__ASSERT(0 != entry);
-	FLAC__ASSERT(0 != field_name);
-	FLAC__ASSERT(0 != field_value);
+	FLAC__ASSERT(entry != NULL);
+	FLAC__ASSERT(field_name != NULL);
+	FLAC__ASSERT(field_value != NULL);
 
-	if(!FLAC__format_vorbiscomment_entry_name_is_legal(field_name))
+	if (!FLAC__format_vorbiscomment_entry_name_is_legal(field_name))
 		return false;
-	if(!FLAC__format_vorbiscomment_entry_value_is_legal((const FLAC__byte *)field_value, (unsigned)(-1)))
+	if (!FLAC__format_vorbiscomment_entry_value_is_legal((const FLAC__byte *)field_value, (uint32_t)(-1)))
 		return false;
 
 	{
 		const size_t nn = strlen(field_name);
 		const size_t nv = strlen(field_value);
 		entry->length = nn + 1 /*=*/ + nv;
-		if(0 == (entry->entry = safe_malloc_add_4op_(nn, /*+*/1, /*+*/nv, /*+*/1)))
+		if ((entry->entry = safe_malloc_add_4op_(nn, /*+*/1, /*+*/nv, /*+*/1)) == NULL)
 			return false;
 		memcpy(entry->entry, field_name, nn);
 		entry->entry[nn] = '=';
@@ -1347,23 +1377,23 @@
 
 FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_entry_to_name_value_pair(const FLAC__StreamMetadata_VorbisComment_Entry entry, char **field_name, char **field_value)
 {
-	FLAC__ASSERT(0 != entry.entry && entry.length > 0);
-	FLAC__ASSERT(0 != field_name);
-	FLAC__ASSERT(0 != field_value);
+	FLAC__ASSERT(entry.entry != NULL && entry.length > 0);
+	FLAC__ASSERT(field_name != NULL);
+	FLAC__ASSERT(field_value != NULL);
 
-	if(!FLAC__format_vorbiscomment_entry_is_legal(entry.entry, entry.length))
+	if (!FLAC__format_vorbiscomment_entry_is_legal(entry.entry, entry.length))
 		return false;
 
 	{
 		const FLAC__byte *eq = (FLAC__byte*)memchr(entry.entry, '=', entry.length);
 		const size_t nn = eq-entry.entry;
 		const size_t nv = entry.length-nn-1; /* -1 for the '=' */
-		FLAC__ASSERT(0 != eq);
-		if(0 == eq)
+
+		if (eq == NULL)
 			return false; /* double protection */
-		if(0 == (*field_name = safe_malloc_add_2op_(nn, /*+*/1)))
+		if ((*field_name = safe_malloc_add_2op_(nn, /*+*/1)) == NULL)
 			return false;
-		if(0 == (*field_value = safe_malloc_add_2op_(nv, /*+*/1))) {
+		if ((*field_value = safe_malloc_add_2op_(nv, /*+*/1)) == NULL) {
 			free(*field_name);
 			return false;
 		}
@@ -1376,33 +1406,33 @@
 	return true;
 }
 
-FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_entry_matches(const FLAC__StreamMetadata_VorbisComment_Entry entry, const char *field_name, unsigned field_name_length)
+FLAC_API FLAC__bool FLAC__metadata_object_vorbiscomment_entry_matches(const FLAC__StreamMetadata_VorbisComment_Entry entry, const char *field_name, uint32_t field_name_length)
 {
-	FLAC__ASSERT(0 != entry.entry && entry.length > 0);
+	FLAC__ASSERT(entry.entry != NULL && entry.length > 0);
 	{
 		const FLAC__byte *eq = (FLAC__byte*)memchr(entry.entry, '=', entry.length);
-		return (0 != eq && (unsigned)(eq-entry.entry) == field_name_length && 0 == FLAC__STRNCASECMP(field_name, (const char *)entry.entry, field_name_length));
+		return (eq != NULL && (uint32_t)(eq-entry.entry) == field_name_length && FLAC__STRNCASECMP(field_name, (const char *)entry.entry, field_name_length) == 0);
 	}
 }
 
-FLAC_API int FLAC__metadata_object_vorbiscomment_find_entry_from(const FLAC__StreamMetadata *object, unsigned offset, const char *field_name)
+FLAC_API int FLAC__metadata_object_vorbiscomment_find_entry_from(const FLAC__StreamMetadata *object, uint32_t offset, const char *field_name)
 {
-	FLAC__ASSERT(0 != field_name);
+	FLAC__ASSERT(field_name != NULL);
 
 	return vorbiscomment_find_entry_from_(object, offset, field_name, strlen(field_name));
 }
 
 FLAC_API int FLAC__metadata_object_vorbiscomment_remove_entry_matching(FLAC__StreamMetadata *object, const char *field_name)
 {
-	const unsigned field_name_length = strlen(field_name);
-	unsigned i;
+	const uint32_t field_name_length = strlen(field_name);
+	uint32_t i;
 
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_VORBIS_COMMENT);
 
-	for(i = 0; i < object->data.vorbis_comment.num_comments; i++) {
-		if(FLAC__metadata_object_vorbiscomment_entry_matches(object->data.vorbis_comment.comments[i], field_name, field_name_length)) {
-			if(!FLAC__metadata_object_vorbiscomment_delete_comment(object, i))
+	for (i = 0; i < object->data.vorbis_comment.num_comments; i++) {
+		if (FLAC__metadata_object_vorbiscomment_entry_matches(object->data.vorbis_comment.comments[i], field_name, field_name_length)) {
+			if (!FLAC__metadata_object_vorbiscomment_delete_comment(object, i))
 				return -1;
 			else
 				return 1;
@@ -1415,18 +1445,18 @@
 FLAC_API int FLAC__metadata_object_vorbiscomment_remove_entries_matching(FLAC__StreamMetadata *object, const char *field_name)
 {
 	FLAC__bool ok = true;
-	unsigned matching = 0;
-	const unsigned field_name_length = strlen(field_name);
+	uint32_t matching = 0;
+	const uint32_t field_name_length = strlen(field_name);
 	int i;
 
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_VORBIS_COMMENT);
 
 	/* must delete from end to start otherwise it will interfere with our iteration */
-	for(i = (int)object->data.vorbis_comment.num_comments - 1; ok && i >= 0; i--) {
-		if(FLAC__metadata_object_vorbiscomment_entry_matches(object->data.vorbis_comment.comments[i], field_name, field_name_length)) {
+	for (i = (int)object->data.vorbis_comment.num_comments - 1; ok && i >= 0; i--) {
+		if (FLAC__metadata_object_vorbiscomment_entry_matches(object->data.vorbis_comment.comments[i], field_name, field_name_length)) {
 			matching++;
-			ok &= FLAC__metadata_object_vorbiscomment_delete_comment(object, (unsigned)i);
+			ok &= FLAC__metadata_object_vorbiscomment_delete_comment(object, (uint32_t)i);
 		}
 	}
 
@@ -1442,10 +1472,10 @@
 {
 	FLAC__StreamMetadata_CueSheet_Track *to;
 
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 
-	if(0 != (to = FLAC__metadata_object_cuesheet_track_new())) {
-		if(!copy_track_(to, object)) {
+	if ((to = FLAC__metadata_object_cuesheet_track_new()) != NULL) {
+		if (!copy_track_(to, object)) {
 			FLAC__metadata_object_cuesheet_track_delete(to);
 			return 0;
 		}
@@ -1456,9 +1486,9 @@
 
 void FLAC__metadata_object_cuesheet_track_delete_data(FLAC__StreamMetadata_CueSheet_Track *object)
 {
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 
-	if(0 != object->indices) {
+	if (object->indices != NULL) {
 		FLAC__ASSERT(object->num_indices > 0);
 		free(object->indices);
 	}
@@ -1470,20 +1500,20 @@
 	free(object);
 }
 
-FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_track_resize_indices(FLAC__StreamMetadata *object, unsigned track_num, unsigned new_num_indices)
+FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_track_resize_indices(FLAC__StreamMetadata *object, uint32_t track_num, uint32_t new_num_indices)
 {
 	FLAC__StreamMetadata_CueSheet_Track *track;
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_CUESHEET);
 	FLAC__ASSERT(track_num < object->data.cue_sheet.num_tracks);
 
 	track = &object->data.cue_sheet.tracks[track_num];
 
-	if(0 == track->indices) {
+	if (track->indices == NULL) {
 		FLAC__ASSERT(track->num_indices == 0);
-		if(0 == new_num_indices)
+		if (new_num_indices == 0)
 			return true;
-		else if(0 == (track->indices = cuesheet_track_index_array_new_(new_num_indices)))
+		else if ((track->indices = cuesheet_track_index_array_new_(new_num_indices)) == NULL)
 			return false;
 	}
 	else {
@@ -1491,20 +1521,25 @@
 		const size_t new_size = new_num_indices * sizeof(FLAC__StreamMetadata_CueSheet_Index);
 
 		/* overflow check */
-		if(new_num_indices > UINT32_MAX / sizeof(FLAC__StreamMetadata_CueSheet_Index))
+		if (new_num_indices > UINT32_MAX / sizeof(FLAC__StreamMetadata_CueSheet_Index))
 			return false;
 
 		FLAC__ASSERT(track->num_indices > 0);
 
-		if(new_size == 0) {
+		if (new_size == 0) {
 			free(track->indices);
 			track->indices = 0;
 		}
-		else if(0 == (track->indices = realloc(track->indices, new_size)))
-			return false;
+		else {
+			/* Leave track->indices untouched if realloc fails */
+			FLAC__StreamMetadata_CueSheet_Index *tmpptr;
+			if ((tmpptr = realloc(track->indices, new_size)) == NULL)
+				return false;
+			track->indices = tmpptr;
+		}
 
 		/* if growing, zero all the lengths/pointers of new elements */
-		if(new_size > old_size)
+		if (new_size > old_size)
 			memset(track->indices + track->num_indices, 0, new_size - old_size);
 	}
 
@@ -1514,18 +1549,18 @@
 	return true;
 }
 
-FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_track_insert_index(FLAC__StreamMetadata *object, unsigned track_num, unsigned index_num, FLAC__StreamMetadata_CueSheet_Index indx)
+FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_track_insert_index(FLAC__StreamMetadata *object, uint32_t track_num, uint32_t index_num, FLAC__StreamMetadata_CueSheet_Index indx)
 {
 	FLAC__StreamMetadata_CueSheet_Track *track;
 
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_CUESHEET);
 	FLAC__ASSERT(track_num < object->data.cue_sheet.num_tracks);
 	FLAC__ASSERT(index_num <= object->data.cue_sheet.tracks[track_num].num_indices);
 
 	track = &object->data.cue_sheet.tracks[track_num];
 
-	if(!FLAC__metadata_object_cuesheet_track_resize_indices(object, track_num, track->num_indices+1))
+	if (!FLAC__metadata_object_cuesheet_track_resize_indices(object, track_num, track->num_indices+1))
 		return false;
 
 	/* move all indices >= index_num forward one space */
@@ -1536,18 +1571,18 @@
 	return true;
 }
 
-FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_track_insert_blank_index(FLAC__StreamMetadata *object, unsigned track_num, unsigned index_num)
+FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_track_insert_blank_index(FLAC__StreamMetadata *object, uint32_t track_num, uint32_t index_num)
 {
 	FLAC__StreamMetadata_CueSheet_Index indx;
 	memset(&indx, 0, sizeof(indx));
 	return FLAC__metadata_object_cuesheet_track_insert_index(object, track_num, index_num, indx);
 }
 
-FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_track_delete_index(FLAC__StreamMetadata *object, unsigned track_num, unsigned index_num)
+FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_track_delete_index(FLAC__StreamMetadata *object, uint32_t track_num, uint32_t index_num)
 {
 	FLAC__StreamMetadata_CueSheet_Track *track;
 
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_CUESHEET);
 	FLAC__ASSERT(track_num < object->data.cue_sheet.num_tracks);
 	FLAC__ASSERT(index_num < object->data.cue_sheet.tracks[track_num].num_indices);
@@ -1562,16 +1597,16 @@
 	return true;
 }
 
-FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_resize_tracks(FLAC__StreamMetadata *object, unsigned new_num_tracks)
+FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_resize_tracks(FLAC__StreamMetadata *object, uint32_t new_num_tracks)
 {
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_CUESHEET);
 
-	if(0 == object->data.cue_sheet.tracks) {
+	if (object->data.cue_sheet.tracks == NULL) {
 		FLAC__ASSERT(object->data.cue_sheet.num_tracks == 0);
-		if(0 == new_num_tracks)
+		if (new_num_tracks == 0)
 			return true;
-		else if(0 == (object->data.cue_sheet.tracks = cuesheet_track_array_new_(new_num_tracks)))
+		else if ((object->data.cue_sheet.tracks = cuesheet_track_array_new_(new_num_tracks)) == NULL)
 			return false;
 	}
 	else {
@@ -1579,28 +1614,32 @@
 		const size_t new_size = new_num_tracks * sizeof(FLAC__StreamMetadata_CueSheet_Track);
 
 		/* overflow check */
-		if(new_num_tracks > UINT32_MAX / sizeof(FLAC__StreamMetadata_CueSheet_Track))
+		if (new_num_tracks > UINT32_MAX / sizeof(FLAC__StreamMetadata_CueSheet_Track))
 			return false;
 
 		FLAC__ASSERT(object->data.cue_sheet.num_tracks > 0);
 
 		/* if shrinking, free the truncated entries */
-		if(new_num_tracks < object->data.cue_sheet.num_tracks) {
-			unsigned i;
-			for(i = new_num_tracks; i < object->data.cue_sheet.num_tracks; i++)
-				if(0 != object->data.cue_sheet.tracks[i].indices)
-					free(object->data.cue_sheet.tracks[i].indices);
+		if (new_num_tracks < object->data.cue_sheet.num_tracks) {
+			uint32_t i;
+			for (i = new_num_tracks; i < object->data.cue_sheet.num_tracks; i++)
+				free(object->data.cue_sheet.tracks[i].indices);
 		}
 
-		if(new_size == 0) {
+		if (new_size == 0) {
 			free(object->data.cue_sheet.tracks);
 			object->data.cue_sheet.tracks = 0;
 		}
-		else if(0 == (object->data.cue_sheet.tracks = realloc(object->data.cue_sheet.tracks, new_size)))
-			return false;
+		else {
+			/* Leave object->data.cue_sheet.tracks untouched if realloc fails */
+			FLAC__StreamMetadata_CueSheet_Track *tmpptr;
+			if ((tmpptr = realloc(object->data.cue_sheet.tracks, new_size)) == NULL)
+				return false;
+			object->data.cue_sheet.tracks = tmpptr;
+		}
 
 		/* if growing, zero all the lengths/pointers of new elements */
-		if(new_size > old_size)
+		if (new_size > old_size)
 			memset(object->data.cue_sheet.tracks + object->data.cue_sheet.num_tracks, 0, new_size - old_size);
 	}
 
@@ -1610,25 +1649,25 @@
 	return true;
 }
 
-FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_set_track(FLAC__StreamMetadata *object, unsigned track_num, FLAC__StreamMetadata_CueSheet_Track *track, FLAC__bool copy)
+FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_set_track(FLAC__StreamMetadata *object, uint32_t track_num, FLAC__StreamMetadata_CueSheet_Track *track, FLAC__bool copy)
 {
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 	FLAC__ASSERT(track_num < object->data.cue_sheet.num_tracks);
 
 	return cuesheet_set_track_(object, object->data.cue_sheet.tracks + track_num, track, copy);
 }
 
-FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_insert_track(FLAC__StreamMetadata *object, unsigned track_num, FLAC__StreamMetadata_CueSheet_Track *track, FLAC__bool copy)
+FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_insert_track(FLAC__StreamMetadata *object, uint32_t track_num, FLAC__StreamMetadata_CueSheet_Track *track, FLAC__bool copy)
 {
 	FLAC__StreamMetadata_CueSheet *cs;
 
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_CUESHEET);
 	FLAC__ASSERT(track_num <= object->data.cue_sheet.num_tracks);
 
 	cs = &object->data.cue_sheet;
 
-	if(!FLAC__metadata_object_cuesheet_resize_tracks(object, cs->num_tracks+1))
+	if (!FLAC__metadata_object_cuesheet_resize_tracks(object, cs->num_tracks+1))
 		return false;
 
 	/* move all tracks >= track_num forward one space */
@@ -1639,26 +1678,25 @@
 	return FLAC__metadata_object_cuesheet_set_track(object, track_num, track, copy);
 }
 
-FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_insert_blank_track(FLAC__StreamMetadata *object, unsigned track_num)
+FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_insert_blank_track(FLAC__StreamMetadata *object, uint32_t track_num)
 {
 	FLAC__StreamMetadata_CueSheet_Track track;
 	memset(&track, 0, sizeof(track));
 	return FLAC__metadata_object_cuesheet_insert_track(object, track_num, &track, /*copy=*/false);
 }
 
-FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_delete_track(FLAC__StreamMetadata *object, unsigned track_num)
+FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_delete_track(FLAC__StreamMetadata *object, uint32_t track_num)
 {
 	FLAC__StreamMetadata_CueSheet *cs;
 
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_CUESHEET);
 	FLAC__ASSERT(track_num < object->data.cue_sheet.num_tracks);
 
 	cs = &object->data.cue_sheet;
 
 	/* free the track at track_num */
-	if(0 != cs->tracks[track_num].indices)
-		free(cs->tracks[track_num].indices);
+	free(cs->tracks[track_num].indices);
 
 	/* move all tracks > track_num backward one space */
 	memmove(&cs->tracks[track_num], &cs->tracks[track_num+1], sizeof(FLAC__StreamMetadata_CueSheet_Track)*(cs->num_tracks-track_num-1));
@@ -1670,13 +1708,13 @@
 
 FLAC_API FLAC__bool FLAC__metadata_object_cuesheet_is_legal(const FLAC__StreamMetadata *object, FLAC__bool check_cd_da_subset, const char **violation)
 {
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_CUESHEET);
 
 	return FLAC__format_cuesheet_is_legal(&object->data.cue_sheet, check_cd_da_subset, violation);
 }
 
-static FLAC__uint64 get_index_01_offset_(const FLAC__StreamMetadata_CueSheet *cs, unsigned track)
+static FLAC__uint64 get_index_01_offset_(const FLAC__StreamMetadata_CueSheet *cs, uint32_t track)
 {
 	if (track >= (cs->num_tracks-1) || cs->tracks[track].num_indices < 1)
 		return 0;
@@ -1705,7 +1743,7 @@
 {
 	const FLAC__StreamMetadata_CueSheet *cs;
 
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_CUESHEET);
 
 	cs = &object->data.cue_sheet;
@@ -1728,27 +1766,26 @@
 	char *old;
 	size_t old_length, new_length;
 
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_PICTURE);
-	FLAC__ASSERT(0 != mime_type);
+	FLAC__ASSERT(mime_type != NULL);
 
 	old = object->data.picture.mime_type;
 	old_length = old? strlen(old) : 0;
 	new_length = strlen(mime_type);
 
 	/* do the copy first so that if we fail we leave the object untouched */
-	if(copy) {
-		if(new_length >= SIZE_MAX) /* overflow check */
+	if (copy) {
+		if (new_length >= SIZE_MAX) /* overflow check */
 			return false;
-		if(!copy_bytes_((FLAC__byte**)(&object->data.picture.mime_type), (FLAC__byte*)mime_type, new_length+1))
+		if (!copy_bytes_((FLAC__byte**)(&object->data.picture.mime_type), (FLAC__byte*)mime_type, new_length+1))
 			return false;
 	}
 	else {
 		object->data.picture.mime_type = mime_type;
 	}
 
-	if(0 != old)
-		free(old);
+	free(old);
 
 	object->length -= old_length;
 	object->length += new_length;
@@ -1760,27 +1797,26 @@
 	FLAC__byte *old;
 	size_t old_length, new_length;
 
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_PICTURE);
-	FLAC__ASSERT(0 != description);
+	FLAC__ASSERT(description != NULL);
 
 	old = object->data.picture.description;
 	old_length = old? strlen((const char *)old) : 0;
 	new_length = strlen((const char *)description);
 
 	/* do the copy first so that if we fail we leave the object untouched */
-	if(copy) {
-		if(new_length >= SIZE_MAX) /* overflow check */
+	if (copy) {
+		if (new_length >= SIZE_MAX) /* overflow check */
 			return false;
-		if(!copy_bytes_(&object->data.picture.description, description, new_length+1))
+		if (!copy_bytes_(&object->data.picture.description, description, new_length+1))
 			return false;
 	}
 	else {
 		object->data.picture.description = description;
 	}
 
-	if(0 != old)
-		free(old);
+	free(old);
 
 	object->length -= old_length;
 	object->length += new_length;
@@ -1791,23 +1827,22 @@
 {
 	FLAC__byte *old;
 
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_PICTURE);
-	FLAC__ASSERT((0 != data && length > 0) || (0 == data && length == 0 && copy == false));
+	FLAC__ASSERT((data != NULL && length > 0) || (data == NULL && length == 0 && copy == false));
 
 	old = object->data.picture.data;
 
 	/* do the copy first so that if we fail we leave the object untouched */
-	if(copy) {
-		if(!copy_bytes_(&object->data.picture.data, data, length))
+	if (copy) {
+		if (!copy_bytes_(&object->data.picture.data, data, length))
 			return false;
 	}
 	else {
 		object->data.picture.data = data;
 	}
 
-	if(0 != old)
-		free(old);
+	free(old);
 
 	object->length -= object->data.picture.data_length;
 	object->data.picture.data_length = length;
@@ -1817,7 +1852,7 @@
 
 FLAC_API FLAC__bool FLAC__metadata_object_picture_is_legal(const FLAC__StreamMetadata *object, const char **violation)
 {
-	FLAC__ASSERT(0 != object);
+	FLAC__ASSERT(object != NULL);
 	FLAC__ASSERT(object->type == FLAC__METADATA_TYPE_PICTURE);
 
 	return FLAC__format_picture_is_legal(&object->data.picture, violation);

diff --git a/src/libFLAC/stream_decoder.c b/src/libFLAC/stream_decoder.c
index 6632d31..09bdada 100644
--- a/src/libFLAC/stream_decoder.c
+++ b/src/libFLAC/stream_decoder.c

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2000-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -37,15 +37,14 @@
 #include <stdio.h>
 #include <stdlib.h> /* for malloc() */
 #include <string.h> /* for memset/memcpy() */
-#include <sys/stat.h> /* for stat() */
 #include <sys/types.h> /* for off_t */
+#include <sys/stat.h>  /* for stat() */
 #include "share/compat.h"
 #include "FLAC/assert.h"
 #include "share/alloc.h"
 #include "protected/stream_decoder.h"
 #include "private/bitreader.h"
 #include "private/bitmath.h"
-#include "private/cpu.h"
 #include "private/crc.h"
 #include "private/fixed.h"
 #include "private/format.h"
@@ -56,13 +55,7 @@
 
 
 /* technically this should be in an "export.c" but this is convenient enough */
-FLAC_API int FLAC_API_SUPPORTS_OGG_FLAC =
-#if FLAC__HAS_OGG
-	1
-#else
-	0
-#endif
-;
+FLAC_API int FLAC_API_SUPPORTS_OGG_FLAC = FLAC__HAS_OGG;
 
 
 /***********************************************************************
@@ -81,26 +74,27 @@
 
 static void set_defaults_(FLAC__StreamDecoder *decoder);
 static FILE *get_binary_stdin_(void);
-static FLAC__bool allocate_output_(FLAC__StreamDecoder *decoder, unsigned size, unsigned channels);
+static FLAC__bool allocate_output_(FLAC__StreamDecoder *decoder, uint32_t size, uint32_t channels, uint32_t bps);
 static FLAC__bool has_id_filtered_(FLAC__StreamDecoder *decoder, FLAC__byte *id);
 static FLAC__bool find_metadata_(FLAC__StreamDecoder *decoder);
 static FLAC__bool read_metadata_(FLAC__StreamDecoder *decoder);
-static FLAC__bool read_metadata_streaminfo_(FLAC__StreamDecoder *decoder, FLAC__bool is_last, unsigned length);
-static FLAC__bool read_metadata_seektable_(FLAC__StreamDecoder *decoder, FLAC__bool is_last, unsigned length);
-static FLAC__bool read_metadata_vorbiscomment_(FLAC__StreamDecoder *decoder, FLAC__StreamMetadata_VorbisComment *obj, unsigned length);
+static FLAC__bool read_metadata_streaminfo_(FLAC__StreamDecoder *decoder, FLAC__bool is_last, uint32_t length);
+static FLAC__bool read_metadata_seektable_(FLAC__StreamDecoder *decoder, FLAC__bool is_last, uint32_t length);
+static FLAC__bool read_metadata_vorbiscomment_(FLAC__StreamDecoder *decoder, FLAC__StreamMetadata_VorbisComment *obj, uint32_t length);
 static FLAC__bool read_metadata_cuesheet_(FLAC__StreamDecoder *decoder, FLAC__StreamMetadata_CueSheet *obj);
 static FLAC__bool read_metadata_picture_(FLAC__StreamDecoder *decoder, FLAC__StreamMetadata_Picture *obj);
 static FLAC__bool skip_id3v2_tag_(FLAC__StreamDecoder *decoder);
 static FLAC__bool frame_sync_(FLAC__StreamDecoder *decoder);
 static FLAC__bool read_frame_(FLAC__StreamDecoder *decoder, FLAC__bool *got_a_frame, FLAC__bool do_full_decode);
 static FLAC__bool read_frame_header_(FLAC__StreamDecoder *decoder);
-static FLAC__bool read_subframe_(FLAC__StreamDecoder *decoder, unsigned channel, unsigned bps, FLAC__bool do_full_decode);
-static FLAC__bool read_subframe_constant_(FLAC__StreamDecoder *decoder, unsigned channel, unsigned bps, FLAC__bool do_full_decode);
-static FLAC__bool read_subframe_fixed_(FLAC__StreamDecoder *decoder, unsigned channel, unsigned bps, const unsigned order, FLAC__bool do_full_decode);
-static FLAC__bool read_subframe_lpc_(FLAC__StreamDecoder *decoder, unsigned channel, unsigned bps, const unsigned order, FLAC__bool do_full_decode);
-static FLAC__bool read_subframe_verbatim_(FLAC__StreamDecoder *decoder, unsigned channel, unsigned bps, FLAC__bool do_full_decode);
-static FLAC__bool read_residual_partitioned_rice_(FLAC__StreamDecoder *decoder, unsigned predictor_order, unsigned partition_order, FLAC__EntropyCodingMethod_PartitionedRiceContents *partitioned_rice_contents, FLAC__int32 *residual, FLAC__bool is_extended);
+static FLAC__bool read_subframe_(FLAC__StreamDecoder *decoder, uint32_t channel, uint32_t bps, FLAC__bool do_full_decode);
+static FLAC__bool read_subframe_constant_(FLAC__StreamDecoder *decoder, uint32_t channel, uint32_t bps, FLAC__bool do_full_decode);
+static FLAC__bool read_subframe_fixed_(FLAC__StreamDecoder *decoder, uint32_t channel, uint32_t bps, const uint32_t order, FLAC__bool do_full_decode);
+static FLAC__bool read_subframe_lpc_(FLAC__StreamDecoder *decoder, uint32_t channel, uint32_t bps, const uint32_t order, FLAC__bool do_full_decode);
+static FLAC__bool read_subframe_verbatim_(FLAC__StreamDecoder *decoder, uint32_t channel, uint32_t bps, FLAC__bool do_full_decode);
+static FLAC__bool read_residual_partitioned_rice_(FLAC__StreamDecoder *decoder, uint32_t predictor_order, uint32_t partition_order, FLAC__EntropyCodingMethod_PartitionedRiceContents *partitioned_rice_contents, FLAC__int32 *residual, FLAC__bool is_extended);
 static FLAC__bool read_zero_padding_(FLAC__StreamDecoder *decoder);
+static void       undo_channel_coding(FLAC__StreamDecoder *decoder);
 static FLAC__bool read_callback_(FLAC__byte buffer[], size_t *bytes, void *client_data);
 #if FLAC__HAS_OGG
 static FLAC__StreamDecoderReadStatus read_callback_ogg_aspect_(const FLAC__StreamDecoder *decoder, FLAC__byte buffer[], size_t *bytes);
@@ -125,9 +119,7 @@
  ***********************************************************************/
 
 typedef struct FLAC__StreamDecoderPrivate {
-#if FLAC__HAS_OGG
 	FLAC__bool is_ogg;
-#endif
 	FLAC__StreamDecoderReadCallback read_callback;
 	FLAC__StreamDecoderSeekCallback seek_callback;
 	FLAC__StreamDecoderTellCallback tell_callback;
@@ -136,19 +128,15 @@
 	FLAC__StreamDecoderWriteCallback write_callback;
 	FLAC__StreamDecoderMetadataCallback metadata_callback;
 	FLAC__StreamDecoderErrorCallback error_callback;
-	/* generic 32-bit datapath: */
-	void (*local_lpc_restore_signal)(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
-	/* generic 64-bit datapath: */
-	void (*local_lpc_restore_signal_64bit)(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
-	/* for use when the signal is <= 16 bits-per-sample, or <= 15 bits-per-sample on a side channel (which requires 1 extra bit): */
-	void (*local_lpc_restore_signal_16bit)(const FLAC__int32 residual[], unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 data[]);
 	void *client_data;
 	FILE *file; /* only used if FLAC__stream_decoder_init_file()/FLAC__stream_decoder_init_file() called, else NULL */
 	FLAC__BitReader *input;
 	FLAC__int32 *output[FLAC__MAX_CHANNELS];
 	FLAC__int32 *residual[FLAC__MAX_CHANNELS]; /* WATCHOUT: these are the aligned pointers; the real pointers that should be free()'d are residual_unaligned[] below */
+	FLAC__int64 *side_subframe;
+	FLAC__bool side_subframe_in_use;
 	FLAC__EntropyCodingMethod_PartitionedRiceContents partitioned_rice_contents[FLAC__MAX_CHANNELS];
-	unsigned output_capacity, output_channels;
+	uint32_t output_capacity, output_channels;
 	FLAC__uint32 fixed_block_size, next_fixed_block_size;
 	FLAC__uint64 samples_decoded;
 	FLAC__bool has_stream_info, has_seek_table;
@@ -159,7 +147,6 @@
 	size_t metadata_filter_ids_count, metadata_filter_ids_capacity; /* units for both are IDs, not bytes */
 	FLAC__Frame frame;
 	FLAC__bool cached; /* true if there is a byte in lookahead */
-	FLAC__CPUInfo cpuinfo;
 	FLAC__byte header_warmup[2]; /* contains the sync code and reserved bits */
 	FLAC__byte lookahead; /* temp storage when we need to look ahead one byte in the stream */
 	/* unaligned (original) pointers to allocated data */
@@ -170,13 +157,13 @@
 	FLAC__MD5Context md5context;
 	FLAC__byte computed_md5sum[16]; /* this is the sum we computed from the decoded data */
 	/* (the rest of these are only used for seeking) */
-	FLAC__Frame last_frame; /* holds the info of the last frame we seeked to */
+	FLAC__Frame last_frame; /* holds the info of the last frame we decoded or seeked to */
+	FLAC__bool last_frame_is_set;
 	FLAC__uint64 first_frame_offset; /* hint to the seek routine of where in the stream the first audio frame starts */
+	FLAC__uint64 last_seen_framesync; /* if tell callback works, the location of the last seen frame sync code, to rewind to if needed */
 	FLAC__uint64 target_sample;
-	unsigned unparseable_frame_count; /* used to tell whether we're decoding a future version of FLAC or just got a bad sync */
-#if FLAC__HAS_OGG
+	uint32_t unparseable_frame_count; /* used to tell whether we're decoding a future version of FLAC or just got a bad sync */
 	FLAC__bool got_a_frame; /* hack needed in Ogg FLAC seek routine to check when process_single() actually writes a frame */
-#endif
 } FLAC__StreamDecoderPrivate;
 
 /***********************************************************************
@@ -240,7 +227,8 @@
 	"FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC",
 	"FLAC__STREAM_DECODER_ERROR_STATUS_BAD_HEADER",
 	"FLAC__STREAM_DECODER_ERROR_STATUS_FRAME_CRC_MISMATCH",
-	"FLAC__STREAM_DECODER_ERROR_STATUS_UNPARSEABLE_STREAM"
+	"FLAC__STREAM_DECODER_ERROR_STATUS_UNPARSEABLE_STREAM",
+	"FLAC__STREAM_DECODER_ERROR_STATUS_BAD_METADATA"
 };
 
 /***********************************************************************
@@ -251,7 +239,7 @@
 FLAC_API FLAC__StreamDecoder *FLAC__stream_decoder_new(void)
 {
 	FLAC__StreamDecoder *decoder;
-	unsigned i;
+	uint32_t i;
 
 	FLAC__ASSERT(sizeof(int) >= 4); /* we want to die right away if this is not true */
 
@@ -295,6 +283,8 @@
 		decoder->private_->residual_unaligned[i] = decoder->private_->residual[i] = 0;
 	}
 
+	decoder->private_->side_subframe = 0;
+
 	decoder->private_->output_capacity = 0;
 	decoder->private_->output_channels = 0;
 	decoder->private_->has_seek_table = false;
@@ -313,7 +303,7 @@
 
 FLAC_API void FLAC__stream_decoder_delete(FLAC__StreamDecoder *decoder)
 {
-	unsigned i;
+	uint32_t i;
 
 	if (decoder == NULL)
 		return ;
@@ -362,10 +352,8 @@
 	if(decoder->protected_->state != FLAC__STREAM_DECODER_UNINITIALIZED)
 		return FLAC__STREAM_DECODER_INIT_STATUS_ALREADY_INITIALIZED;
 
-#if !FLAC__HAS_OGG
-	if(is_ogg)
+	if(FLAC__HAS_OGG == 0 && is_ogg)
 		return FLAC__STREAM_DECODER_INIT_STATUS_UNSUPPORTED_CONTAINER;
-#endif
 
 	if(
 		0 == read_callback ||
@@ -381,49 +369,6 @@
 		return decoder->protected_->initstate = FLAC__STREAM_DECODER_INIT_STATUS_ERROR_OPENING_FILE;
 #endif
 
-	/*
-	 * get the CPU info and set the function pointers
-	 */
-	FLAC__cpu_info(&decoder->private_->cpuinfo);
-	/* first default to the non-asm routines */
-	decoder->private_->local_lpc_restore_signal = FLAC__lpc_restore_signal;
-	decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide;
-	decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal;
-	/* now override with asm where appropriate */
-#ifndef FLAC__NO_ASM
-	if(decoder->private_->cpuinfo.use_asm) {
-#ifdef FLAC__CPU_IA32
-		FLAC__ASSERT(decoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_IA32);
-#ifdef FLAC__HAS_NASM
-		decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide_asm_ia32; /* OPT_IA32: was really necessary for GCC < 4.9 */
-		if(decoder->private_->cpuinfo.ia32.mmx) {
-			decoder->private_->local_lpc_restore_signal = FLAC__lpc_restore_signal_asm_ia32;
-			decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_asm_ia32_mmx;
-		}
-		else {
-			decoder->private_->local_lpc_restore_signal = FLAC__lpc_restore_signal_asm_ia32;
-			decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_asm_ia32;
-		}
-#endif
-#ifdef FLAC__HAS_X86INTRIN
-# if defined FLAC__SSE2_SUPPORTED && !defined FLAC__HAS_NASM /* OPT_SSE: not better than MMX asm */
-		if(decoder->private_->cpuinfo.ia32.sse2) {
-			decoder->private_->local_lpc_restore_signal_16bit = FLAC__lpc_restore_signal_16_intrin_sse2;
-		}
-# endif
-# if defined FLAC__SSE4_1_SUPPORTED
-		if(decoder->private_->cpuinfo.ia32.sse41) {
-			decoder->private_->local_lpc_restore_signal_64bit = FLAC__lpc_restore_signal_wide_intrin_sse41;
-		}
-# endif
-#endif
-#elif defined FLAC__CPU_X86_64
-		FLAC__ASSERT(decoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_X86_64);
-		/* No useful SSE optimizations yet */
-#endif
-	}
-#endif
-
 	/* from here on, errors are fatal */
 
 	if(!FLAC__bitreader_init(decoder->private_->input, read_callback_, decoder)) {
@@ -641,7 +586,7 @@
 FLAC_API FLAC__bool FLAC__stream_decoder_finish(FLAC__StreamDecoder *decoder)
 {
 	FLAC__bool md5_failed = false;
-	unsigned i;
+	uint32_t i;
 
 	FLAC__ASSERT(0 != decoder);
 	FLAC__ASSERT(0 != decoder->private_);
@@ -655,18 +600,17 @@
 	 */
 	FLAC__MD5Final(decoder->private_->computed_md5sum, &decoder->private_->md5context);
 
-	if(decoder->private_->has_seek_table && 0 != decoder->private_->seek_table.data.seek_table.points) {
-		free(decoder->private_->seek_table.data.seek_table.points);
-		decoder->private_->seek_table.data.seek_table.points = 0;
-		decoder->private_->has_seek_table = false;
-	}
+	free(decoder->private_->seek_table.data.seek_table.points);
+	decoder->private_->seek_table.data.seek_table.points = 0;
+	decoder->private_->has_seek_table = false;
+
 	FLAC__bitreader_free(decoder->private_->input);
 	for(i = 0; i < FLAC__MAX_CHANNELS; i++) {
 		/* WATCHOUT:
-		 * FLAC__lpc_restore_signal_asm_ia32_mmx() requires that the
-		 * output arrays have a buffer of up to 3 zeroes in front
-		 * (at negative indices) for alignment purposes; we use 4
-		 * to keep the data well-aligned.
+		 * FLAC__lpc_restore_signal_asm_ia32_mmx() and ..._intrin_sseN()
+		 * require that the output arrays have a buffer of up to 3 zeroes
+		 * in front (at negative indices) for alignment purposes;
+		 * we use 4 to keep the data well-aligned.
 		 */
 		if(0 != decoder->private_->output[i]) {
 			free(decoder->private_->output[i]-4);
@@ -677,6 +621,10 @@
 			decoder->private_->residual_unaligned[i] = decoder->private_->residual[i] = 0;
 		}
 	}
+	if(0 != decoder->private_->side_subframe) {
+		free(decoder->private_->side_subframe);
+		decoder->private_->side_subframe = 0;
+	}
 	decoder->private_->output_capacity = 0;
 	decoder->private_->output_channels = 0;
 
@@ -736,9 +684,9 @@
 	FLAC__ASSERT(0 != decoder);
 	FLAC__ASSERT(0 != decoder->private_);
 	FLAC__ASSERT(0 != decoder->protected_);
-	FLAC__ASSERT((unsigned)type <= FLAC__MAX_METADATA_TYPE_CODE);
+	FLAC__ASSERT((uint32_t)type <= FLAC__MAX_METADATA_TYPE_CODE);
 	/* double protection */
-	if((unsigned)type > FLAC__MAX_METADATA_TYPE_CODE)
+	if((uint32_t)type > FLAC__MAX_METADATA_TYPE_CODE)
 		return false;
 	if(decoder->protected_->state != FLAC__STREAM_DECODER_UNINITIALIZED)
 		return false;
@@ -778,7 +726,7 @@
 
 FLAC_API FLAC__bool FLAC__stream_decoder_set_metadata_respond_all(FLAC__StreamDecoder *decoder)
 {
-	unsigned i;
+	uint32_t i;
 	FLAC__ASSERT(0 != decoder);
 	FLAC__ASSERT(0 != decoder->private_);
 	FLAC__ASSERT(0 != decoder->protected_);
@@ -795,9 +743,9 @@
 	FLAC__ASSERT(0 != decoder);
 	FLAC__ASSERT(0 != decoder->private_);
 	FLAC__ASSERT(0 != decoder->protected_);
-	FLAC__ASSERT((unsigned)type <= FLAC__MAX_METADATA_TYPE_CODE);
+	FLAC__ASSERT((uint32_t)type <= FLAC__MAX_METADATA_TYPE_CODE);
 	/* double protection */
-	if((unsigned)type > FLAC__MAX_METADATA_TYPE_CODE)
+	if((uint32_t)type > FLAC__MAX_METADATA_TYPE_CODE)
 		return false;
 	if(decoder->protected_->state != FLAC__STREAM_DECODER_UNINITIALIZED)
 		return false;
@@ -873,7 +821,7 @@
 	return decoder->private_->has_stream_info? decoder->private_->stream_info.data.stream_info.total_samples : 0;
 }
 
-FLAC_API unsigned FLAC__stream_decoder_get_channels(const FLAC__StreamDecoder *decoder)
+FLAC_API uint32_t FLAC__stream_decoder_get_channels(const FLAC__StreamDecoder *decoder)
 {
 	FLAC__ASSERT(0 != decoder);
 	FLAC__ASSERT(0 != decoder->protected_);
@@ -887,21 +835,21 @@
 	return decoder->protected_->channel_assignment;
 }
 
-FLAC_API unsigned FLAC__stream_decoder_get_bits_per_sample(const FLAC__StreamDecoder *decoder)
+FLAC_API uint32_t FLAC__stream_decoder_get_bits_per_sample(const FLAC__StreamDecoder *decoder)
 {
 	FLAC__ASSERT(0 != decoder);
 	FLAC__ASSERT(0 != decoder->protected_);
 	return decoder->protected_->bits_per_sample;
 }
 
-FLAC_API unsigned FLAC__stream_decoder_get_sample_rate(const FLAC__StreamDecoder *decoder)
+FLAC_API uint32_t FLAC__stream_decoder_get_sample_rate(const FLAC__StreamDecoder *decoder)
 {
 	FLAC__ASSERT(0 != decoder);
 	FLAC__ASSERT(0 != decoder->protected_);
 	return decoder->protected_->sample_rate;
 }
 
-FLAC_API unsigned FLAC__stream_decoder_get_blocksize(const FLAC__StreamDecoder *decoder)
+FLAC_API uint32_t FLAC__stream_decoder_get_blocksize(const FLAC__StreamDecoder *decoder)
 {
 	FLAC__ASSERT(0 != decoder);
 	FLAC__ASSERT(0 != decoder->protected_);
@@ -914,10 +862,9 @@
 	FLAC__ASSERT(0 != decoder->private_);
 	FLAC__ASSERT(0 != position);
 
-#if FLAC__HAS_OGG
-	if(decoder->private_->is_ogg)
+	if(FLAC__HAS_OGG && decoder->private_->is_ogg)
 		return false;
-#endif
+
 	if(0 == decoder->private_->tell_callback)
 		return false;
 	if(decoder->private_->tell_callback(decoder, position, decoder->private_->client_data) != FLAC__STREAM_DECODER_TELL_STATUS_OK)
@@ -930,14 +877,24 @@
 	return true;
 }
 
+FLAC_API const void *FLAC__stream_decoder_get_client_data(FLAC__StreamDecoder *decoder)
+{
+	return decoder->private_->client_data;
+}
+
 FLAC_API FLAC__bool FLAC__stream_decoder_flush(FLAC__StreamDecoder *decoder)
 {
 	FLAC__ASSERT(0 != decoder);
 	FLAC__ASSERT(0 != decoder->private_);
 	FLAC__ASSERT(0 != decoder->protected_);
 
+	if(!decoder->private_->internal_reset_hack && decoder->protected_->state == FLAC__STREAM_DECODER_UNINITIALIZED)
+		return false;
+
 	decoder->private_->samples_decoded = 0;
 	decoder->private_->do_md5_checking = false;
+	decoder->private_->last_seen_framesync = 0;
+	decoder->private_->last_frame_is_set = false;
 
 #if FLAC__HAS_OGG
 	if(decoder->private_->is_ogg)
@@ -981,17 +938,15 @@
 		if(decoder->private_->seek_callback && decoder->private_->seek_callback(decoder, 0, decoder->private_->client_data) == FLAC__STREAM_DECODER_SEEK_STATUS_ERROR)
 			return false; /* seekable and seek fails, reset fails */
 	}
-	else
-		decoder->private_->internal_reset_hack = false;
 
 	decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_METADATA;
 
 	decoder->private_->has_stream_info = false;
-	if(decoder->private_->has_seek_table && 0 != decoder->private_->seek_table.data.seek_table.points) {
-		free(decoder->private_->seek_table.data.seek_table.points);
-		decoder->private_->seek_table.data.seek_table.points = 0;
-		decoder->private_->has_seek_table = false;
-	}
+
+	free(decoder->private_->seek_table.data.seek_table.points);
+	decoder->private_->seek_table.data.seek_table.points = 0;
+	decoder->private_->has_seek_table = false;
+
 	decoder->private_->do_md5_checking = decoder->protected_->md5_checking;
 	/*
 	 * This goes in reset() and not flush() because according to the spec, a
@@ -1005,10 +960,19 @@
 	 * FLAC__stream_decoder_finish() to make sure things are always cleaned up
 	 * properly.
 	 */
+	if(!decoder->private_->internal_reset_hack) {
+		/* Only finish MD5 context when it has been initialized
+		 * (i.e. when internal_reset_hack is not set) */
+		FLAC__MD5Final(decoder->private_->computed_md5sum, &decoder->private_->md5context);
+	}
+	else
+		decoder->private_->internal_reset_hack = false;
 	FLAC__MD5Init(&decoder->private_->md5context);
 
 	decoder->private_->first_frame_offset = 0;
 	decoder->private_->unparseable_frame_count = 0;
+	decoder->private_->last_seen_framesync = 0;
+	decoder->private_->last_frame_is_set = false;
 
 	return true;
 }
@@ -1215,7 +1179,7 @@
  *
  ***********************************************************************/
 
-unsigned FLAC__stream_decoder_get_input_bytes_unconsumed(const FLAC__StreamDecoder *decoder)
+uint32_t FLAC__stream_decoder_get_input_bytes_unconsumed(const FLAC__StreamDecoder *decoder)
 {
 	FLAC__ASSERT(0 != decoder);
 	FLAC__ASSERT(FLAC__bitreader_is_consumed_byte_aligned(decoder->private_->input));
@@ -1231,9 +1195,7 @@
 
 void set_defaults_(FLAC__StreamDecoder *decoder)
 {
-#if FLAC__HAS_OGG
 	decoder->private_->is_ogg = false;
-#endif
 	decoder->private_->read_callback = 0;
 	decoder->private_->seek_callback = 0;
 	decoder->private_->tell_callback = 0;
@@ -1266,9 +1228,6 @@
 	 */
 #if defined _MSC_VER || defined __MINGW32__
 	_setmode(_fileno(stdin), _O_BINARY);
-#elif defined __CYGWIN__
-	/* almost certainly not needed for any modern Cygwin, but let's be safe... */
-	setmode(_fileno(stdin), _O_BINARY);
 #elif defined __EMX__
 	setmode(fileno(stdin), O_BINARY);
 #endif
@@ -1276,12 +1235,13 @@
 	return stdin;
 }
 
-FLAC__bool allocate_output_(FLAC__StreamDecoder *decoder, unsigned size, unsigned channels)
+FLAC__bool allocate_output_(FLAC__StreamDecoder *decoder, uint32_t size, uint32_t channels, uint32_t bps)
 {
-	unsigned i;
+	uint32_t i;
 	FLAC__int32 *tmp;
 
-	if(size <= decoder->private_->output_capacity && channels <= decoder->private_->output_channels)
+	if(size <= decoder->private_->output_capacity && channels <= decoder->private_->output_channels &&
+	   (bps < 32 || decoder->private_->side_subframe != 0))
 		return true;
 
 	/* simply using realloc() is not practical because the number of channels may change mid-stream */
@@ -1297,12 +1257,17 @@
 		}
 	}
 
+	if(0 != decoder->private_->side_subframe) {
+		free(decoder->private_->side_subframe);
+		decoder->private_->side_subframe = 0;
+	}
+
 	for(i = 0; i < channels; i++) {
 		/* WATCHOUT:
-		 * FLAC__lpc_restore_signal_asm_ia32_mmx() requires that the
-		 * output arrays have a buffer of up to 3 zeroes in front
-		 * (at negative indices) for alignment purposes; we use 4
-		 * to keep the data well-aligned.
+		 * FLAC__lpc_restore_signal_asm_ia32_mmx() and ..._intrin_sseN()
+		 * require that the output arrays have a buffer of up to 3 zeroes
+		 * in front (at negative indices) for alignment purposes;
+		 * we use 4 to keep the data well-aligned.
 		 */
 		tmp = safe_malloc_muladd2_(sizeof(FLAC__int32), /*times (*/size, /*+*/4/*)*/);
 		if(tmp == 0) {
@@ -1318,6 +1283,14 @@
 		}
 	}
 
+	if(bps == 32) {
+		decoder->private_->side_subframe = safe_malloc_mul_2op_p(sizeof(FLAC__int64), /*times (*/size);
+		if(decoder->private_->side_subframe == NULL) {
+			decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
+			return false;
+		}
+	}
+
 	decoder->private_->output_capacity = size;
 	decoder->private_->output_channels = channels;
 
@@ -1341,7 +1314,7 @@
 FLAC__bool find_metadata_(FLAC__StreamDecoder *decoder)
 {
 	FLAC__uint32 x;
-	unsigned i, id;
+	uint32_t i, id;
 	FLAC__bool first = true;
 
 	FLAC__ASSERT(FLAC__bitreader_is_consumed_byte_aligned(decoder->private_->input));
@@ -1431,6 +1404,9 @@
 			decoder->private_->metadata_callback(decoder, &decoder->private_->stream_info, decoder->private_->client_data);
 	}
 	else if(type == FLAC__METADATA_TYPE_SEEKTABLE) {
+		/* just in case we already have a seek table, and reading the next one fails: */
+		decoder->private_->has_seek_table = false;
+
 		if(!read_metadata_seektable_(decoder, is_last, length))
 			return false;
 
@@ -1440,7 +1416,7 @@
 	}
 	else {
 		FLAC__bool skip_it = !decoder->private_->metadata_filter[type];
-		unsigned real_length = length;
+		uint32_t real_length = length;
 		FLAC__StreamMetadata block;
 
 		memset(&block, 0, sizeof(block));
@@ -1469,6 +1445,7 @@
 		}
 		else {
 			FLAC__bool ok = true;
+			FLAC__bitreader_set_limit(decoder->private_->input, real_length*8);
 			switch(type) {
 				case FLAC__METADATA_TYPE_PADDING:
 					/* skip the padding bytes */
@@ -1517,6 +1494,16 @@
 						block.data.unknown.data = 0;
 					break;
 			}
+			if(FLAC__bitreader_limit_remaining(decoder->private_->input) > 0) {
+				/* Content in metadata block didn't fit in block length
+				 * We cannot know whether the length or the content was
+				 * corrupt, so stop parsing metadata */
+				send_error_to_client_(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_BAD_METADATA);
+				if(decoder->protected_->state == FLAC__STREAM_DECODER_READ_METADATA)
+					decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC;
+				ok = false;
+			}
+			FLAC__bitreader_remove_limit(decoder->private_->input);
 			if(ok && !decoder->private_->is_seeking && decoder->private_->metadata_callback)
 				decoder->private_->metadata_callback(decoder, &block, decoder->private_->client_data);
 
@@ -1539,7 +1526,7 @@
 						free(block.data.vorbis_comment.comments);
 					break;
 				case FLAC__METADATA_TYPE_CUESHEET:
-					if(block.data.cue_sheet.num_tracks > 0)
+					if(block.data.cue_sheet.num_tracks > 0 && 0 != block.data.cue_sheet.tracks)
 						for(i = 0; i < block.data.cue_sheet.num_tracks; i++)
 							if(0 != block.data.cue_sheet.tracks[i].indices)
 								free(block.data.cue_sheet.tracks[i].indices);
@@ -1578,10 +1565,10 @@
 	return true;
 }
 
-FLAC__bool read_metadata_streaminfo_(FLAC__StreamDecoder *decoder, FLAC__bool is_last, unsigned length)
+FLAC__bool read_metadata_streaminfo_(FLAC__StreamDecoder *decoder, FLAC__bool is_last, uint32_t length)
 {
 	FLAC__uint32 x;
-	unsigned bits, used_bits = 0;
+	uint32_t bits, used_bits = 0;
 
 	FLAC__ASSERT(FLAC__bitreader_is_consumed_byte_aligned(decoder->private_->input));
 
@@ -1642,6 +1629,8 @@
 
 	/* skip the rest of the block */
 	FLAC__ASSERT(used_bits % 8 == 0);
+	if (length < (used_bits / 8))
+		return false; /* read_callback_ sets the state for us */
 	length -= (used_bits / 8);
 	if(!FLAC__bitreader_skip_byte_block_aligned_no_crc(decoder->private_->input, length))
 		return false; /* read_callback_ sets the state for us */
@@ -1649,7 +1638,7 @@
 	return true;
 }
 
-FLAC__bool read_metadata_seektable_(FLAC__StreamDecoder *decoder, FLAC__bool is_last, unsigned length)
+FLAC__bool read_metadata_seektable_(FLAC__StreamDecoder *decoder, FLAC__bool is_last, uint32_t length)
 {
 	FLAC__uint32 i, x;
 	FLAC__uint64 xx;
@@ -1691,7 +1680,7 @@
 	return true;
 }
 
-FLAC__bool read_metadata_vorbiscomment_(FLAC__StreamDecoder *decoder, FLAC__StreamMetadata_VorbisComment *obj, unsigned length)
+FLAC__bool read_metadata_vorbiscomment_(FLAC__StreamDecoder *decoder, FLAC__StreamMetadata_VorbisComment *obj, uint32_t length)
 {
 	FLAC__uint32 i;
 
@@ -1728,12 +1717,22 @@
 			return false; /* read_callback_ sets the state for us */
 
 		/* read comments */
+		if (obj->num_comments > 100000) {
+			/* Possibly malicious file. */
+			obj->num_comments = 0;
+			return false;
+		}
 		if (obj->num_comments > 0) {
 			if (0 == (obj->comments = safe_malloc_mul_2op_p(obj->num_comments, /*times*/sizeof(FLAC__StreamMetadata_VorbisComment_Entry)))) {
+				obj->num_comments = 0;
 				decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
 				return false;
 			}
 			for (i = 0; i < obj->num_comments; i++) {
+				/* Initialize here just to make sure. */
+				obj->comments[i].length = 0;
+				obj->comments[i].entry = 0;
+
 				FLAC__ASSERT(FLAC__STREAM_METADATA_VORBIS_COMMENT_ENTRY_LENGTH_LEN == 32);
 				if (length < 4) {
 					obj->num_comments = i;
@@ -1741,12 +1740,12 @@
 				}
 				else
 					length -= 4;
-				if (!FLAC__bitreader_read_uint32_little_endian(decoder->private_->input, &obj->comments[i].length))
+				if (!FLAC__bitreader_read_uint32_little_endian(decoder->private_->input, &obj->comments[i].length)) {
+					obj->num_comments = i;
 					return false; /* read_callback_ sets the state for us */
+				}
 				if (obj->comments[i].length > 0) {
 					if (length < obj->comments[i].length) {
-						obj->comments[i].length = 0;
-						obj->comments[i].entry = 0;
 						obj->num_comments = i;
 						goto skip;
 					}
@@ -1754,23 +1753,32 @@
 						length -= obj->comments[i].length;
 					if (0 == (obj->comments[i].entry = safe_malloc_add_2op_(obj->comments[i].length, /*+*/1))) {
 						decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
+						obj->num_comments = i;
 						return false;
 					}
-					if (!FLAC__bitreader_read_byte_block_aligned_no_crc(decoder->private_->input, obj->comments[i].entry, obj->comments[i].length))
-						return false; /* read_callback_ sets the state for us */
+					memset (obj->comments[i].entry, 0, obj->comments[i].length) ;
+					if (!FLAC__bitreader_read_byte_block_aligned_no_crc(decoder->private_->input, obj->comments[i].entry, obj->comments[i].length)) {
+						/* Current i-th entry is bad, so we delete it. */
+						free (obj->comments[i].entry) ;
+						obj->comments[i].entry = NULL ;
+						obj->num_comments = i;
+						goto skip;
+					}
 					obj->comments[i].entry[obj->comments[i].length] = '\0';
 				}
 				else
 					obj->comments[i].entry = 0;
 			}
 		}
-		else
-			obj->comments = 0;
 	}
 
   skip:
 	if (length > 0) {
-		/* This will only happen on files with invalid data in comments */
+		/* length > 0 can only happen on files with invalid data in comments */
+		if(obj->num_comments < 1) {
+			free(obj->comments);
+			obj->comments = NULL;
+		}
 		if(!FLAC__bitreader_skip_byte_block_aligned_no_crc(decoder->private_->input, length))
 			return false; /* read_callback_ sets the state for us */
 	}
@@ -1870,11 +1878,18 @@
 	/* read type */
 	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_PICTURE_TYPE_LEN))
 		return false; /* read_callback_ sets the state for us */
-	obj->type = x;
+	if(x < FLAC__STREAM_METADATA_PICTURE_TYPE_UNDEFINED)
+		obj->type = x;
+	else
+		obj->type = FLAC__STREAM_METADATA_PICTURE_TYPE_OTHER;
 
 	/* read MIME type */
 	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_PICTURE_MIME_TYPE_LENGTH_LEN))
 		return false; /* read_callback_ sets the state for us */
+	if(FLAC__bitreader_limit_remaining(decoder->private_->input) < x){
+		FLAC__bitreader_limit_invalidate(decoder->private_->input);
+		return false;
+	}
 	if(0 == (obj->mime_type = safe_malloc_add_2op_(x, /*+*/1))) {
 		decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
 		return false;
@@ -1888,6 +1903,10 @@
 	/* read description */
 	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, FLAC__STREAM_METADATA_PICTURE_DESCRIPTION_LENGTH_LEN))
 		return false; /* read_callback_ sets the state for us */
+	if(FLAC__bitreader_limit_remaining(decoder->private_->input) < x){
+		FLAC__bitreader_limit_invalidate(decoder->private_->input);
+		return false;
+	}
 	if(0 == (obj->description = safe_malloc_add_2op_(x, /*+*/1))) {
 		decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
 		return false;
@@ -1917,6 +1936,10 @@
 	/* read data */
 	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &(obj->data_length), FLAC__STREAM_METADATA_PICTURE_DATA_LENGTH_LEN))
 		return false; /* read_callback_ sets the state for us */
+	if(FLAC__bitreader_limit_remaining(decoder->private_->input) < obj->data_length){
+		FLAC__bitreader_limit_invalidate(decoder->private_->input);
+		return false;
+	}
 	if(0 == (obj->data = safe_malloc_(obj->data_length))) {
 		decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
 		return false;
@@ -1932,7 +1955,7 @@
 FLAC__bool skip_id3v2_tag_(FLAC__StreamDecoder *decoder)
 {
 	FLAC__uint32 x;
-	unsigned i, skip;
+	uint32_t i, skip;
 
 	/* skip the version and flags bytes */
 	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, 24))
@@ -1956,15 +1979,6 @@
 	FLAC__uint32 x;
 	FLAC__bool first = true;
 
-	/* If we know the total number of samples in the stream, stop if we've read that many. */
-	/* This will stop us, for example, from wasting time trying to sync on an ID3V1 tag. */
-	if(FLAC__stream_decoder_get_total_samples(decoder) > 0) {
-		if(decoder->private_->samples_decoded >= FLAC__stream_decoder_get_total_samples(decoder)) {
-			decoder->protected_->state = FLAC__STREAM_DECODER_END_OF_STREAM;
-			return true;
-		}
-	}
-
 	/* make sure we're byte aligned */
 	if(!FLAC__bitreader_is_consumed_byte_aligned(decoder->private_->input)) {
 		if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, FLAC__bitreader_bits_left_for_byte_alignment(decoder->private_->input)))
@@ -1994,6 +2008,12 @@
 			else if(x >> 1 == 0x7c) { /* MAGIC NUMBER for the last 6 sync bits and reserved 7th bit */
 				decoder->private_->header_warmup[1] = (FLAC__byte)x;
 				decoder->protected_->state = FLAC__STREAM_DECODER_READ_FRAME;
+
+				/* Save location so we can rewind in case the frame turns
+				 * out to be invalid after the header */
+				FLAC__bitreader_set_framesync_location(decoder->private_->input);
+				if(!FLAC__stream_decoder_get_decode_position(decoder, &decoder->private_->last_seen_framesync))
+					decoder->private_->last_seen_framesync = 0;
 				return true;
 			}
 		}
@@ -2008,13 +2028,13 @@
 
 FLAC__bool read_frame_(FLAC__StreamDecoder *decoder, FLAC__bool *got_a_frame, FLAC__bool do_full_decode)
 {
-	unsigned channel;
-	unsigned i;
-	FLAC__int32 mid, side;
-	unsigned frame_crc; /* the one we calculate from the input stream */
+	uint32_t channel;
+	uint32_t i;
+	uint32_t frame_crc; /* the one we calculate from the input stream */
 	FLAC__uint32 x;
 
 	*got_a_frame = false;
+	decoder->private_->side_subframe_in_use = false;
 
 	/* init the CRC */
 	frame_crc = 0;
@@ -2026,13 +2046,13 @@
 		return false;
 	if(decoder->protected_->state == FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC) /* means we didn't sync on a valid header */
 		return true;
-	if(!allocate_output_(decoder, decoder->private_->frame.header.blocksize, decoder->private_->frame.header.channels))
+	if(!allocate_output_(decoder, decoder->private_->frame.header.blocksize, decoder->private_->frame.header.channels, decoder->private_->frame.header.bits_per_sample))
 		return false;
 	for(channel = 0; channel < decoder->private_->frame.header.channels; channel++) {
 		/*
 		 * first figure the correct bits-per-sample of the subframe
 		 */
-		unsigned bps = decoder->private_->frame.header.bits_per_sample;
+		uint32_t bps = decoder->private_->frame.header.bits_per_sample;
 		switch(decoder->private_->frame.header.channel_assignment) {
 			case FLAC__CHANNEL_ASSIGNMENT_INDEPENDENT:
 				/* no adjustment needed */
@@ -2058,95 +2078,171 @@
 		/*
 		 * now read it
 		 */
-		if(!read_subframe_(decoder, channel, bps, do_full_decode))
-			return false;
-		if(decoder->protected_->state == FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC) /* means bad sync or got corruption */
-			return true;
+		if(!read_subframe_(decoder, channel, bps, do_full_decode)){
+			/* read_callback_ sets the state for us */
+			if(decoder->protected_->state == FLAC__STREAM_DECODER_END_OF_STREAM)
+				break;
+			else
+				return false;
+		}
 	}
-	if(!read_zero_padding_(decoder))
-		return false;
-	if(decoder->protected_->state == FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC) /* means bad sync or got corruption (i.e. "zero bits" were not all zeroes) */
-		return true;
+
+	if(decoder->protected_->state != FLAC__STREAM_DECODER_END_OF_STREAM)
+		if(!read_zero_padding_(decoder))
+			return false;
 
 	/*
 	 * Read the frame CRC-16 from the footer and check
 	 */
-	frame_crc = FLAC__bitreader_get_read_crc16(decoder->private_->input);
-	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, FLAC__FRAME_FOOTER_CRC_LEN))
-		return false; /* read_callback_ sets the state for us */
-	if(frame_crc == x) {
+	if(decoder->protected_->state == FLAC__STREAM_DECODER_READ_FRAME) {
+		frame_crc = FLAC__bitreader_get_read_crc16(decoder->private_->input);
+		if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, FLAC__FRAME_FOOTER_CRC_LEN)) {
+			/* read_callback_ sets the state for us */
+			if(decoder->protected_->state != FLAC__STREAM_DECODER_END_OF_STREAM)
+				return false;
+		}
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+	}
+	if(decoder->protected_->state == FLAC__STREAM_DECODER_READ_FRAME && frame_crc == x) {
+#endif
 		if(do_full_decode) {
 			/* Undo any special channel coding */
-			switch(decoder->private_->frame.header.channel_assignment) {
-				case FLAC__CHANNEL_ASSIGNMENT_INDEPENDENT:
-					/* do nothing */
-					break;
-				case FLAC__CHANNEL_ASSIGNMENT_LEFT_SIDE:
-					FLAC__ASSERT(decoder->private_->frame.header.channels == 2);
-					for(i = 0; i < decoder->private_->frame.header.blocksize; i++)
-						decoder->private_->output[1][i] = decoder->private_->output[0][i] - decoder->private_->output[1][i];
-					break;
-				case FLAC__CHANNEL_ASSIGNMENT_RIGHT_SIDE:
-					FLAC__ASSERT(decoder->private_->frame.header.channels == 2);
-					for(i = 0; i < decoder->private_->frame.header.blocksize; i++)
-						decoder->private_->output[0][i] += decoder->private_->output[1][i];
-					break;
-				case FLAC__CHANNEL_ASSIGNMENT_MID_SIDE:
-					FLAC__ASSERT(decoder->private_->frame.header.channels == 2);
-					for(i = 0; i < decoder->private_->frame.header.blocksize; i++) {
-#if 1
-						mid = decoder->private_->output[0][i];
-						side = decoder->private_->output[1][i];
-						mid <<= 1;
-						mid |= (side & 1); /* i.e. if 'side' is odd... */
-						decoder->private_->output[0][i] = (mid + side) >> 1;
-						decoder->private_->output[1][i] = (mid - side) >> 1;
-#else
-						/* OPT: without 'side' temp variable */
-						mid = (decoder->private_->output[0][i] << 1) | (decoder->private_->output[1][i] & 1); /* i.e. if 'side' is odd... */
-						decoder->private_->output[0][i] = (mid + decoder->private_->output[1][i]) >> 1;
-						decoder->private_->output[1][i] = (mid - decoder->private_->output[1][i]) >> 1;
-#endif
+			undo_channel_coding(decoder);
+			/* Check whether decoded data actually fits bps */
+			for(channel = 0; channel < decoder->private_->frame.header.channels; channel++) {
+				for(i = 0; i < decoder->private_->frame.header.blocksize; i++) {
+					int shift_bits = 32 - decoder->private_->frame.header.bits_per_sample;
+					/* Check whether shift_bits MSBs are 'empty' by shifting up and down */
+					if((decoder->private_->output[channel][i] < (INT32_MIN >> shift_bits)) ||
+					   (decoder->private_->output[channel][i] > (INT32_MAX >> shift_bits))) {
+						/* Bad frame, emit error */
+						send_error_to_client_(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_FRAME_CRC_MISMATCH);
+						decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC;
+						break;
 					}
-					break;
-				default:
-					FLAC__ASSERT(0);
-					break;
+				}
 			}
 		}
 	}
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+	else if (decoder->protected_->state == FLAC__STREAM_DECODER_READ_FRAME) {
+		/* Bad frame, emit error */
+		send_error_to_client_(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_FRAME_CRC_MISMATCH);
+		decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC;
+	}
+#endif
+
+	/* Check whether frames are missing, if so, add silence to compensate */
+	if(decoder->private_->last_frame_is_set && decoder->protected_->state == FLAC__STREAM_DECODER_READ_FRAME && !decoder->private_->is_seeking && do_full_decode) {
+		FLAC__ASSERT(decoder->private_->frame.header.number_type == FLAC__FRAME_NUMBER_TYPE_SAMPLE_NUMBER);
+		FLAC__ASSERT(decoder->private_->last_frame.header.number_type == FLAC__FRAME_NUMBER_TYPE_SAMPLE_NUMBER);
+		if(decoder->private_->last_frame.header.number.sample_number + decoder->private_->last_frame.header.blocksize < decoder->private_->frame.header.number.sample_number) {
+			uint32_t padding_samples_needed = decoder->private_->frame.header.number.sample_number - (decoder->private_->last_frame.header.number.sample_number + decoder->private_->last_frame.header.blocksize);
+
+			/* Do some extra validation to assure last frame an current frame
+			 * header are both valid before adding silence inbetween
+			 * Technically both frames could be valid with differing sample_rates,
+			 * channels and bits_per_sample, but it is quite rare */
+			if(decoder->private_->last_frame.header.sample_rate == decoder->private_->frame.header.sample_rate &&
+			   decoder->private_->last_frame.header.channels == decoder->private_->frame.header.channels &&
+			   decoder->private_->last_frame.header.bits_per_sample == decoder->private_->frame.header.bits_per_sample &&
+			   decoder->private_->last_frame.header.blocksize >= 16) {
+
+				FLAC__Frame empty_frame;
+				empty_frame.header = decoder->private_->last_frame.header;
+				empty_frame.footer.crc = 0;
+				/* No repairs larger than 5 seconds or 50 frames are made, to not
+				 * unexpectedly create enormous files when one of the headers was
+				 * corrupt after all */
+				if(padding_samples_needed > (5*empty_frame.header.sample_rate))
+					padding_samples_needed = 5*empty_frame.header.sample_rate;
+				if(padding_samples_needed > (50*empty_frame.header.blocksize))
+					padding_samples_needed = 50*empty_frame.header.blocksize;
+				while(padding_samples_needed){
+					empty_frame.header.number.sample_number += empty_frame.header.blocksize;
+					if(padding_samples_needed < empty_frame.header.blocksize)
+						empty_frame.header.blocksize = padding_samples_needed;
+					padding_samples_needed -= empty_frame.header.blocksize;
+					decoder->protected_->blocksize = empty_frame.header.blocksize;
+
+					FLAC__ASSERT(empty_frame.header.number_type == FLAC__FRAME_NUMBER_TYPE_SAMPLE_NUMBER);
+					decoder->private_->samples_decoded = empty_frame.header.number.sample_number + empty_frame.header.blocksize;
+
+					if(!allocate_output_(decoder, empty_frame.header.blocksize, empty_frame.header.channels, empty_frame.header.bits_per_sample))
+						return false;
+
+					for(channel = 0; channel < empty_frame.header.channels; channel++) {
+						empty_frame.subframes[channel].type = FLAC__SUBFRAME_TYPE_CONSTANT;
+						empty_frame.subframes[channel].data.constant.value = 0;
+						empty_frame.subframes[channel].wasted_bits = 0;
+						memset(decoder->private_->output[channel], 0, sizeof(FLAC__int32) * empty_frame.header.blocksize);
+					}
+
+					if(write_audio_frame_to_client_(decoder, &empty_frame, (const FLAC__int32 * const *)decoder->private_->output) != FLAC__STREAM_DECODER_WRITE_STATUS_CONTINUE) {
+						decoder->protected_->state = FLAC__STREAM_DECODER_ABORTED;
+						return false;
+					}
+				}
+			}
+		}
+	}
+
+	if(decoder->protected_->state == FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC || decoder->protected_->state == FLAC__STREAM_DECODER_END_OF_STREAM) {
+		/* Got corruption, rewind if possible. Return value of seek
+		* isn't checked, if the seek fails the decoder will continue anyway */
+		if(!FLAC__bitreader_rewind_to_after_last_seen_framesync(decoder->private_->input)){
+#ifndef NDEBUG
+			fprintf(stderr, "Rewinding, seeking necessary\n");
+#endif
+			if(decoder->private_->seek_callback && decoder->private_->last_seen_framesync){
+				/* Last framesync isn't in bitreader anymore, rewind with seek if possible */
+#ifndef NDEBUG
+				FLAC__uint64 current_decode_position;
+				if(FLAC__stream_decoder_get_decode_position(decoder, &current_decode_position))
+					fprintf(stderr, "Bitreader was %" PRIu64 " bytes short\n", current_decode_position-decoder->private_->last_seen_framesync);
+#endif
+				if(decoder->private_->seek_callback(decoder, decoder->private_->last_seen_framesync, decoder->private_->client_data) == FLAC__STREAM_DECODER_SEEK_STATUS_ERROR) {
+					decoder->protected_->state = FLAC__STREAM_DECODER_SEEK_ERROR;
+					return false;
+				}
+				if(!FLAC__bitreader_clear(decoder->private_->input)) {
+					decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
+					return false;
+				}
+			}
+		}
+#ifndef NDEBUG
+		else{
+			fprintf(stderr, "Rewinding, seeking not necessary\n");
+		}
+#endif
+	}
 	else {
-		/* Bad frame, emit error and zero the output signal */
-		send_error_to_client_(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_FRAME_CRC_MISMATCH);
+		*got_a_frame = true;
+
+		/* we wait to update fixed_block_size until here, when we're sure we've got a proper frame and hence a correct blocksize */
+		if(decoder->private_->next_fixed_block_size)
+			decoder->private_->fixed_block_size = decoder->private_->next_fixed_block_size;
+
+		/* put the latest values into the public section of the decoder instance */
+		decoder->protected_->channels = decoder->private_->frame.header.channels;
+		decoder->protected_->channel_assignment = decoder->private_->frame.header.channel_assignment;
+		decoder->protected_->bits_per_sample = decoder->private_->frame.header.bits_per_sample;
+		decoder->protected_->sample_rate = decoder->private_->frame.header.sample_rate;
+		decoder->protected_->blocksize = decoder->private_->frame.header.blocksize;
+
+		FLAC__ASSERT(decoder->private_->frame.header.number_type == FLAC__FRAME_NUMBER_TYPE_SAMPLE_NUMBER);
+		decoder->private_->samples_decoded = decoder->private_->frame.header.number.sample_number + decoder->private_->frame.header.blocksize;
+
+		/* write it */
 		if(do_full_decode) {
-			for(channel = 0; channel < decoder->private_->frame.header.channels; channel++) {
-				memset(decoder->private_->output[channel], 0, sizeof(FLAC__int32) * decoder->private_->frame.header.blocksize);
+			if(write_audio_frame_to_client_(decoder, &decoder->private_->frame, (const FLAC__int32 * const *)decoder->private_->output) != FLAC__STREAM_DECODER_WRITE_STATUS_CONTINUE) {
+				decoder->protected_->state = FLAC__STREAM_DECODER_ABORTED;
+				return false;
 			}
 		}
 	}
 
-	*got_a_frame = true;
-
-	/* we wait to update fixed_block_size until here, when we're sure we've got a proper frame and hence a correct blocksize */
-	if(decoder->private_->next_fixed_block_size)
-		decoder->private_->fixed_block_size = decoder->private_->next_fixed_block_size;
-
-	/* put the latest values into the public section of the decoder instance */
-	decoder->protected_->channels = decoder->private_->frame.header.channels;
-	decoder->protected_->channel_assignment = decoder->private_->frame.header.channel_assignment;
-	decoder->protected_->bits_per_sample = decoder->private_->frame.header.bits_per_sample;
-	decoder->protected_->sample_rate = decoder->private_->frame.header.sample_rate;
-	decoder->protected_->blocksize = decoder->private_->frame.header.blocksize;
-
-	FLAC__ASSERT(decoder->private_->frame.header.number_type == FLAC__FRAME_NUMBER_TYPE_SAMPLE_NUMBER);
-	decoder->private_->samples_decoded = decoder->private_->frame.header.number.sample_number + decoder->private_->frame.header.blocksize;
-
-	/* write it */
-	if(do_full_decode) {
-		if(write_audio_frame_to_client_(decoder, &decoder->private_->frame, (const FLAC__int32 * const *)decoder->private_->output) != FLAC__STREAM_DECODER_WRITE_STATUS_CONTINUE)
-			return false;
-	}
-
 	decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC;
 	return true;
 }
@@ -2155,9 +2251,9 @@
 {
 	FLAC__uint32 x;
 	FLAC__uint64 xx;
-	unsigned i, blocksize_hint = 0, sample_rate_hint = 0;
+	uint32_t i, blocksize_hint = 0, sample_rate_hint = 0;
 	FLAC__byte crc8, raw_header[16]; /* MAGIC NUMBER based on the maximum frame header size, including CRC */
-	unsigned raw_header_len;
+	uint32_t raw_header_len;
 	FLAC__bool is_unparseable = false;
 
 	FLAC__ASSERT(FLAC__bitreader_is_consumed_byte_aligned(decoder->private_->input));
@@ -2292,7 +2388,7 @@
 			FLAC__ASSERT(0);
 	}
 
-	x = (unsigned)(raw_header[3] >> 4);
+	x = (uint32_t)(raw_header[3] >> 4);
 	if(x & 8) {
 		decoder->private_->frame.header.channels = 2;
 		switch(x & 7) {
@@ -2311,11 +2407,11 @@
 		}
 	}
 	else {
-		decoder->private_->frame.header.channels = (unsigned)x + 1;
+		decoder->private_->frame.header.channels = (uint32_t)x + 1;
 		decoder->private_->frame.header.channel_assignment = FLAC__CHANNEL_ASSIGNMENT_INDEPENDENT;
 	}
 
-	switch(x = (unsigned)(raw_header[3] & 0x0e) >> 1) {
+	switch(x = (uint32_t)(raw_header[3] & 0x0e) >> 1) {
 		case 0:
 			if(decoder->private_->has_stream_info)
 				decoder->private_->frame.header.bits_per_sample = decoder->private_->stream_info.data.stream_info.bits_per_sample;
@@ -2328,6 +2424,9 @@
 		case 2:
 			decoder->private_->frame.header.bits_per_sample = 12;
 			break;
+		case 3:
+			is_unparseable = true;
+			break;
 		case 4:
 			decoder->private_->frame.header.bits_per_sample = 16;
 			break;
@@ -2337,18 +2436,19 @@
 		case 6:
 			decoder->private_->frame.header.bits_per_sample = 24;
 			break;
-		case 3:
 		case 7:
-			is_unparseable = true;
+			decoder->private_->frame.header.bits_per_sample = 32;
 			break;
 		default:
 			FLAC__ASSERT(0);
 			break;
 	}
 
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
 	/* check to make sure that reserved bit is 0 */
 	if(raw_header[3] & 0x01) /* MAGIC NUMBER */
 		is_unparseable = true;
+#endif
 
 	/* read the frame's starting sample number (or frame number as the case may be) */
 	if(
@@ -2420,11 +2520,13 @@
 		return false; /* read_callback_ sets the state for us */
 	crc8 = (FLAC__byte)x;
 
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
 	if(FLAC__crc8(raw_header, raw_header_len) != crc8) {
 		send_error_to_client_(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_BAD_HEADER);
 		decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC;
 		return true;
 	}
+#endif
 
 	/* calculate the sample number from the frame number if needed */
 	decoder->private_->next_fixed_block_size = 0;
@@ -2460,11 +2562,11 @@
 	return true;
 }
 
-FLAC__bool read_subframe_(FLAC__StreamDecoder *decoder, unsigned channel, unsigned bps, FLAC__bool do_full_decode)
+FLAC__bool read_subframe_(FLAC__StreamDecoder *decoder, uint32_t channel, uint32_t bps, FLAC__bool do_full_decode)
 {
 	FLAC__uint32 x;
 	FLAC__bool wasted_bits;
-	unsigned i;
+	uint32_t i;
 
 	if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &x, 8)) /* MAGIC NUMBER */
 		return false; /* read_callback_ sets the state for us */
@@ -2473,10 +2575,12 @@
 	x &= 0xfe;
 
 	if(wasted_bits) {
-		unsigned u;
+		uint32_t u;
 		if(!FLAC__bitreader_read_unary_unsigned(decoder->private_->input, &u))
 			return false; /* read_callback_ sets the state for us */
 		decoder->private_->frame.subframes[channel].wasted_bits = u+1;
+		if (decoder->private_->frame.subframes[channel].wasted_bits >= bps)
+			return false;
 		bps -= decoder->private_->frame.subframes[channel].wasted_bits;
 	}
 	else
@@ -2504,7 +2608,13 @@
 		return true;
 	}
 	else if(x <= 24) {
-		if(!read_subframe_fixed_(decoder, channel, bps, (x>>1)&7, do_full_decode))
+		uint32_t predictor_order = (x>>1)&7;
+		if(decoder->private_->frame.header.blocksize <= predictor_order){
+			send_error_to_client_(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC);
+			decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC;
+			return true;
+		}
+		if(!read_subframe_fixed_(decoder, channel, bps, predictor_order, do_full_decode))
 			return false;
 		if(decoder->protected_->state == FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC) /* means bad sync or got corruption */
 			return true;
@@ -2515,7 +2625,13 @@
 		return true;
 	}
 	else {
-		if(!read_subframe_lpc_(decoder, channel, bps, ((x>>1)&31)+1, do_full_decode))
+		uint32_t predictor_order = ((x>>1)&31)+1;
+		if(decoder->private_->frame.header.blocksize <= predictor_order){
+			send_error_to_client_(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC);
+			decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC;
+			return true;
+		}
+		if(!read_subframe_lpc_(decoder, channel, bps, predictor_order, do_full_decode))
 			return false;
 		if(decoder->protected_->state == FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC) /* means bad sync or got corruption */
 			return true;
@@ -2523,42 +2639,63 @@
 
 	if(wasted_bits && do_full_decode) {
 		x = decoder->private_->frame.subframes[channel].wasted_bits;
-		for(i = 0; i < decoder->private_->frame.header.blocksize; i++)
-			decoder->private_->output[channel][i] <<= x;
+		if((bps + x) < 33) {
+			for(i = 0; i < decoder->private_->frame.header.blocksize; i++) {
+				uint32_t val = decoder->private_->output[channel][i];
+				decoder->private_->output[channel][i] = (val << x);
+			}
+		}
+		else {
+			/* When there are wasted bits, bps is never 33 and so
+			 * side_subframe is never already in use */
+			FLAC__ASSERT(!decoder->private_->side_subframe_in_use);
+			decoder->private_->side_subframe_in_use = true;
+			for(i = 0; i < decoder->private_->frame.header.blocksize; i++) {
+				uint64_t val = decoder->private_->output[channel][i];
+				decoder->private_->side_subframe[i] = (val << x);
+			}
+		}
 	}
 
 	return true;
 }
 
-FLAC__bool read_subframe_constant_(FLAC__StreamDecoder *decoder, unsigned channel, unsigned bps, FLAC__bool do_full_decode)
+FLAC__bool read_subframe_constant_(FLAC__StreamDecoder *decoder, uint32_t channel, uint32_t bps, FLAC__bool do_full_decode)
 {
 	FLAC__Subframe_Constant *subframe = &decoder->private_->frame.subframes[channel].data.constant;
-	FLAC__int32 x;
-	unsigned i;
-	FLAC__int32 *output = decoder->private_->output[channel];
+	FLAC__int64 x;
+	uint32_t i;
 
 	decoder->private_->frame.subframes[channel].type = FLAC__SUBFRAME_TYPE_CONSTANT;
 
-	if(!FLAC__bitreader_read_raw_int32(decoder->private_->input, &x, bps))
+	if(!FLAC__bitreader_read_raw_int64(decoder->private_->input, &x, bps))
 		return false; /* read_callback_ sets the state for us */
 
 	subframe->value = x;
 
 	/* decode the subframe */
 	if(do_full_decode) {
-		for(i = 0; i < decoder->private_->frame.header.blocksize; i++)
-			output[i] = x;
+		if(bps <= 32) {
+			FLAC__int32 *output = decoder->private_->output[channel];
+			for(i = 0; i < decoder->private_->frame.header.blocksize; i++)
+				output[i] = x;
+		} else {
+			FLAC__int64 *output = decoder->private_->side_subframe;
+			decoder->private_->side_subframe_in_use = true;
+			for(i = 0; i < decoder->private_->frame.header.blocksize; i++)
+				output[i] = x;
+		}
 	}
 
 	return true;
 }
 
-FLAC__bool read_subframe_fixed_(FLAC__StreamDecoder *decoder, unsigned channel, unsigned bps, const unsigned order, FLAC__bool do_full_decode)
+FLAC__bool read_subframe_fixed_(FLAC__StreamDecoder *decoder, uint32_t channel, uint32_t bps, const uint32_t order, FLAC__bool do_full_decode)
 {
 	FLAC__Subframe_Fixed *subframe = &decoder->private_->frame.subframes[channel].data.fixed;
-	FLAC__int32 i32;
+	FLAC__int64 i64;
 	FLAC__uint32 u32;
-	unsigned u;
+	uint32_t u;
 
 	decoder->private_->frame.subframes[channel].type = FLAC__SUBFRAME_TYPE_FIXED;
 
@@ -2567,9 +2704,9 @@
 
 	/* read warm-up samples */
 	for(u = 0; u < order; u++) {
-		if(!FLAC__bitreader_read_raw_int32(decoder->private_->input, &i32, bps))
+		if(!FLAC__bitreader_read_raw_int64(decoder->private_->input, &i64, bps))
 			return false; /* read_callback_ sets the state for us */
-		subframe->warmup[u] = i32;
+		subframe->warmup[u] = i64;
 	}
 
 	/* read entropy coding method info */
@@ -2581,6 +2718,12 @@
 		case FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2:
 			if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &u32, FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ORDER_LEN))
 				return false; /* read_callback_ sets the state for us */
+			if((decoder->private_->frame.header.blocksize >> u32 < order) ||
+			   (decoder->private_->frame.header.blocksize % (1 << u32) > 0)) {
+				send_error_to_client_(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC);
+				decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC;
+				return true;
+			}
 			subframe->entropy_coding_method.data.partitioned_rice.order = u32;
 			subframe->entropy_coding_method.data.partitioned_rice.contents = &decoder->private_->partitioned_rice_contents[channel];
 			break;
@@ -2603,19 +2746,32 @@
 
 	/* decode the subframe */
 	if(do_full_decode) {
-		memcpy(decoder->private_->output[channel], subframe->warmup, sizeof(FLAC__int32) * order);
-		FLAC__fixed_restore_signal(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, order, decoder->private_->output[channel]+order);
+		if(bps < 33){
+			uint32_t i;
+			for(i = 0; i < order; i++)
+				decoder->private_->output[channel][i] = subframe->warmup[i];
+			if(bps+order <= 32)
+				FLAC__fixed_restore_signal(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, order, decoder->private_->output[channel]+order);
+			else
+				FLAC__fixed_restore_signal_wide(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, order, decoder->private_->output[channel]+order);
+		}
+		else {
+			decoder->private_->side_subframe_in_use = true;
+			memcpy(decoder->private_->side_subframe, subframe->warmup, sizeof(FLAC__int64) * order);
+			FLAC__fixed_restore_signal_wide_33bit(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, order, decoder->private_->side_subframe+order);
+		}
 	}
 
 	return true;
 }
 
-FLAC__bool read_subframe_lpc_(FLAC__StreamDecoder *decoder, unsigned channel, unsigned bps, const unsigned order, FLAC__bool do_full_decode)
+FLAC__bool read_subframe_lpc_(FLAC__StreamDecoder *decoder, uint32_t channel, uint32_t bps, const uint32_t order, FLAC__bool do_full_decode)
 {
 	FLAC__Subframe_LPC *subframe = &decoder->private_->frame.subframes[channel].data.lpc;
 	FLAC__int32 i32;
+	FLAC__int64 i64;
 	FLAC__uint32 u32;
-	unsigned u;
+	uint32_t u;
 
 	decoder->private_->frame.subframes[channel].type = FLAC__SUBFRAME_TYPE_LPC;
 
@@ -2624,9 +2780,9 @@
 
 	/* read warm-up samples */
 	for(u = 0; u < order; u++) {
-		if(!FLAC__bitreader_read_raw_int32(decoder->private_->input, &i32, bps))
+		if(!FLAC__bitreader_read_raw_int64(decoder->private_->input, &i64, bps))
 			return false; /* read_callback_ sets the state for us */
-		subframe->warmup[u] = i32;
+		subframe->warmup[u] = i64;
 	}
 
 	/* read qlp coeff precision */
@@ -2642,6 +2798,11 @@
 	/* read qlp shift */
 	if(!FLAC__bitreader_read_raw_int32(decoder->private_->input, &i32, FLAC__SUBFRAME_LPC_QLP_SHIFT_LEN))
 		return false; /* read_callback_ sets the state for us */
+	if(i32 < 0) {
+		send_error_to_client_(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC);
+		decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC;
+		return true;
+	}
 	subframe->quantization_level = i32;
 
 	/* read quantized lp coefficiencts */
@@ -2660,6 +2821,12 @@
 		case FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2:
 			if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &u32, FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ORDER_LEN))
 				return false; /* read_callback_ sets the state for us */
+			if((decoder->private_->frame.header.blocksize >> u32 < order) ||
+			   (decoder->private_->frame.header.blocksize % (1 << u32) > 0)) {
+				send_error_to_client_(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC);
+				decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC;
+				return true;
+			}
 			subframe->entropy_coding_method.data.partitioned_rice.order = u32;
 			subframe->entropy_coding_method.data.partitioned_rice.contents = &decoder->private_->partitioned_rice_contents[channel];
 			break;
@@ -2682,72 +2849,78 @@
 
 	/* decode the subframe */
 	if(do_full_decode) {
-		memcpy(decoder->private_->output[channel], subframe->warmup, sizeof(FLAC__int32) * order);
-		/*@@@@@@ technically not pessimistic enough, should be more like
-		if( (FLAC__uint64)order * ((((FLAC__uint64)1)<<bps)-1) * ((1<<subframe->qlp_coeff_precision)-1) < (((FLAC__uint64)-1) << 32) )
-		*/
-		if(bps + subframe->qlp_coeff_precision + FLAC__bitmath_ilog2(order) <= 32)
-			if(bps <= 16 && subframe->qlp_coeff_precision <= 16)
-				decoder->private_->local_lpc_restore_signal_16bit(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order);
+		if(bps <= 32) {
+			uint32_t i;
+			for(i = 0; i < order; i++)
+				decoder->private_->output[channel][i] = subframe->warmup[i];
+			if(FLAC__lpc_max_residual_bps(bps, subframe->qlp_coeff, order, subframe->quantization_level) <= 32 &&
+			   FLAC__lpc_max_prediction_before_shift_bps(bps, subframe->qlp_coeff, order) <= 32)
+				FLAC__lpc_restore_signal(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order);
 			else
-				decoder->private_->local_lpc_restore_signal(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order);
-		else
-			decoder->private_->local_lpc_restore_signal_64bit(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order);
+				FLAC__lpc_restore_signal_wide(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->output[channel]+order);
+		}
+		else {
+			decoder->private_->side_subframe_in_use = true;
+			memcpy(decoder->private_->side_subframe, subframe->warmup, sizeof(FLAC__int64) * order);
+			FLAC__lpc_restore_signal_wide_33bit(decoder->private_->residual[channel], decoder->private_->frame.header.blocksize-order, subframe->qlp_coeff, order, subframe->quantization_level, decoder->private_->side_subframe+order);
+		}
 	}
 
 	return true;
 }
 
-FLAC__bool read_subframe_verbatim_(FLAC__StreamDecoder *decoder, unsigned channel, unsigned bps, FLAC__bool do_full_decode)
+FLAC__bool read_subframe_verbatim_(FLAC__StreamDecoder *decoder, uint32_t channel, uint32_t bps, FLAC__bool do_full_decode)
 {
 	FLAC__Subframe_Verbatim *subframe = &decoder->private_->frame.subframes[channel].data.verbatim;
-	FLAC__int32 x, *residual = decoder->private_->residual[channel];
-	unsigned i;
+	uint32_t i;
 
 	decoder->private_->frame.subframes[channel].type = FLAC__SUBFRAME_TYPE_VERBATIM;
 
-	subframe->data = residual;
+	if(bps < 33) {
+		FLAC__int32 x, *residual = decoder->private_->residual[channel];
 
-	for(i = 0; i < decoder->private_->frame.header.blocksize; i++) {
-		if(!FLAC__bitreader_read_raw_int32(decoder->private_->input, &x, bps))
-			return false; /* read_callback_ sets the state for us */
-		residual[i] = x;
+		subframe->data_type = FLAC__VERBATIM_SUBFRAME_DATA_TYPE_INT32;
+		subframe->data.int32 = residual;
+
+		for(i = 0; i < decoder->private_->frame.header.blocksize; i++) {
+			if(!FLAC__bitreader_read_raw_int32(decoder->private_->input, &x, bps))
+				return false; /* read_callback_ sets the state for us */
+			residual[i] = x;
+		}
+
+		/* decode the subframe */
+		if(do_full_decode)
+			memcpy(decoder->private_->output[channel], subframe->data.int32, sizeof(FLAC__int32) * decoder->private_->frame.header.blocksize);
 	}
+	else {
+		FLAC__int64 x, *side = decoder->private_->side_subframe;
 
-	/* decode the subframe */
-	if(do_full_decode)
-		memcpy(decoder->private_->output[channel], subframe->data, sizeof(FLAC__int32) * decoder->private_->frame.header.blocksize);
+		subframe->data_type = FLAC__VERBATIM_SUBFRAME_DATA_TYPE_INT64;
+		subframe->data.int64 = side;
+		decoder->private_->side_subframe_in_use = true;
+
+		for(i = 0; i < decoder->private_->frame.header.blocksize; i++) {
+			if(!FLAC__bitreader_read_raw_int64(decoder->private_->input, &x, bps))
+				return false; /* read_callback_ sets the state for us */
+			side[i] = x;
+		}
+	}
 
 	return true;
 }
 
-FLAC__bool read_residual_partitioned_rice_(FLAC__StreamDecoder *decoder, unsigned predictor_order, unsigned partition_order, FLAC__EntropyCodingMethod_PartitionedRiceContents *partitioned_rice_contents, FLAC__int32 *residual, FLAC__bool is_extended)
+FLAC__bool read_residual_partitioned_rice_(FLAC__StreamDecoder *decoder, uint32_t predictor_order, uint32_t partition_order, FLAC__EntropyCodingMethod_PartitionedRiceContents *partitioned_rice_contents, FLAC__int32 *residual, FLAC__bool is_extended)
 {
 	FLAC__uint32 rice_parameter;
 	int i;
-	unsigned partition, sample, u;
-	const unsigned partitions = 1u << partition_order;
-	const unsigned partition_samples = partition_order > 0? decoder->private_->frame.header.blocksize >> partition_order : decoder->private_->frame.header.blocksize - predictor_order;
-	const unsigned plen = is_extended? FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_PARAMETER_LEN : FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_PARAMETER_LEN;
-	const unsigned pesc = is_extended? FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_ESCAPE_PARAMETER : FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ESCAPE_PARAMETER;
+	uint32_t partition, sample, u;
+	const uint32_t partitions = 1u << partition_order;
+	const uint32_t partition_samples = decoder->private_->frame.header.blocksize >> partition_order;
+	const uint32_t plen = is_extended? FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_PARAMETER_LEN : FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_PARAMETER_LEN;
+	const uint32_t pesc = is_extended? FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_ESCAPE_PARAMETER : FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ESCAPE_PARAMETER;
 
-	/* sanity checks */
-	if(partition_order == 0) {
-		if(decoder->private_->frame.header.blocksize < predictor_order) {
-			send_error_to_client_(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC);
-			decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC;
-			/* We have received a potentially malicious bit stream. All we can do is error out to avoid a heap overflow. */
-			return false;
-		}
-	}
-	else {
-		if(partition_samples < predictor_order) {
-			send_error_to_client_(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC);
-			decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC;
-			/* We have received a potentially malicious bit stream. All we can do is error out to avoid a heap overflow. */
-			return false;
-		}
-	}
+	/* invalid predictor and partition orders mush be handled in the callers */
+	FLAC__ASSERT(partition_order > 0? partition_samples >= predictor_order : decoder->private_->frame.header.blocksize >= predictor_order);
 
 	if(!FLAC__format_entropy_coding_method_partitioned_rice_contents_ensure_size(partitioned_rice_contents, flac_max(6u, partition_order))) {
 		decoder->protected_->state = FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR;
@@ -2761,19 +2934,34 @@
 		partitioned_rice_contents->parameters[partition] = rice_parameter;
 		if(rice_parameter < pesc) {
 			partitioned_rice_contents->raw_bits[partition] = 0;
-			u = (partition_order == 0 || partition > 0)? partition_samples : partition_samples - predictor_order;
-			if(!FLAC__bitreader_read_rice_signed_block(decoder->private_->input, residual + sample, u, rice_parameter))
-				return false; /* read_callback_ sets the state for us */
+			u = (partition == 0) ? partition_samples - predictor_order : partition_samples;
+			if(!FLAC__bitreader_read_rice_signed_block(decoder->private_->input, residual + sample, u, rice_parameter)){
+				if(decoder->protected_->state == FLAC__STREAM_DECODER_READ_FRAME) {
+					/* no error was set, read_callback_ didn't set it, so
+					 * invalid rice symbol was found */
+					send_error_to_client_(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC);
+					decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC;
+					return true;
+				}
+				else
+					return false; /* read_callback_ sets the state for us */
+			}
 			sample += u;
 		}
 		else {
 			if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &rice_parameter, FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_RAW_LEN))
 				return false; /* read_callback_ sets the state for us */
 			partitioned_rice_contents->raw_bits[partition] = rice_parameter;
-			for(u = (partition_order == 0 || partition > 0)? 0 : predictor_order; u < partition_samples; u++, sample++) {
-				if(!FLAC__bitreader_read_raw_int32(decoder->private_->input, &i, rice_parameter))
-					return false; /* read_callback_ sets the state for us */
-				residual[sample] = i;
+			if(rice_parameter == 0) {
+				for(u = (partition == 0)? predictor_order : 0; u < partition_samples; u++, sample++)
+					residual[sample] = 0;
+			}
+			else{
+				for(u = (partition == 0)? predictor_order : 0; u < partition_samples; u++, sample++) {
+					if(!FLAC__bitreader_read_raw_int32(decoder->private_->input, &i, rice_parameter))
+						return false; /* read_callback_ sets the state for us */
+					residual[sample] = i;
+				}
 			}
 		}
 	}
@@ -2787,10 +2975,12 @@
 		FLAC__uint32 zero = 0;
 		if(!FLAC__bitreader_read_raw_uint32(decoder->private_->input, &zero, FLAC__bitreader_bits_left_for_byte_alignment(decoder->private_->input)))
 			return false; /* read_callback_ sets the state for us */
+#ifndef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
 		if(zero != 0) {
 			send_error_to_client_(decoder, FLAC__STREAM_DECODER_ERROR_STATUS_LOST_SYNC);
 			decoder->protected_->state = FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC;
 		}
+#endif
 	}
 	return true;
 }
@@ -2875,6 +3065,66 @@
 	 */
 }
 
+#ifdef FUZZING_BUILD_MODE_NO_SANITIZE_SIGNED_INTEGER_OVERFLOW
+/* The attribute below is to silence the undefined sanitizer of oss-fuzz.
+ * Because fuzzing feeds bogus predictors and residual samples to the
+ * decoder, having overflows in this section is unavoidable. Also,
+ * because the calculated values are audio path only, there is no
+ * potential for security problems */
+__attribute__((no_sanitize("signed-integer-overflow")))
+#endif
+void undo_channel_coding(FLAC__StreamDecoder *decoder) {
+	uint32_t i;
+	switch(decoder->private_->frame.header.channel_assignment) {
+	case FLAC__CHANNEL_ASSIGNMENT_INDEPENDENT:
+		/* do nothing */
+		break;
+	case FLAC__CHANNEL_ASSIGNMENT_LEFT_SIDE:
+		FLAC__ASSERT(decoder->private_->frame.header.channels == 2);
+		FLAC__ASSERT(decoder->private_->side_subframe_in_use != /* logical XOR */ (decoder->private_->frame.header.bits_per_sample < 32));
+		for(i = 0; i < decoder->private_->frame.header.blocksize; i++)
+			if(decoder->private_->side_subframe_in_use)
+				decoder->private_->output[1][i] = decoder->private_->output[0][i] - decoder->private_->side_subframe[i];
+			else
+				decoder->private_->output[1][i] = decoder->private_->output[0][i] - decoder->private_->output[1][i];
+		break;
+	case FLAC__CHANNEL_ASSIGNMENT_RIGHT_SIDE:
+		FLAC__ASSERT(decoder->private_->frame.header.channels == 2);
+		FLAC__ASSERT(decoder->private_->side_subframe_in_use != /* logical XOR */ (decoder->private_->frame.header.bits_per_sample < 32));
+		for(i = 0; i < decoder->private_->frame.header.blocksize; i++)
+			if(decoder->private_->side_subframe_in_use)
+				decoder->private_->output[0][i] = decoder->private_->output[1][i] + decoder->private_->side_subframe[i];
+			else
+				decoder->private_->output[0][i] += decoder->private_->output[1][i];
+		break;
+	case FLAC__CHANNEL_ASSIGNMENT_MID_SIDE:
+		FLAC__ASSERT(decoder->private_->frame.header.channels == 2);
+		FLAC__ASSERT(decoder->private_->side_subframe_in_use != /* logical XOR */ (decoder->private_->frame.header.bits_per_sample < 32));
+		for(i = 0; i < decoder->private_->frame.header.blocksize; i++) {
+			if(!decoder->private_->side_subframe_in_use){
+				FLAC__int32 mid, side;
+				mid = decoder->private_->output[0][i];
+				side = decoder->private_->output[1][i];
+				mid = ((uint32_t) mid) << 1;
+				mid |= (side & 1); /* i.e. if 'side' is odd... */
+				decoder->private_->output[0][i] = (mid + side) >> 1;
+				decoder->private_->output[1][i] = (mid - side) >> 1;
+			}
+			else { /* bps == 32 */
+				FLAC__int64 mid;
+				mid = ((uint64_t)decoder->private_->output[0][i]) << 1;
+				mid |= (decoder->private_->side_subframe[i] & 1); /* i.e. if 'side' is odd... */
+				decoder->private_->output[0][i] = (mid + decoder->private_->side_subframe[i]) >> 1;
+				decoder->private_->output[1][i] = (mid - decoder->private_->side_subframe[i]) >> 1;
+			}
+		}
+		break;
+	default:
+		FLAC__ASSERT(0);
+		break;
+	}
+}
+
 #if FLAC__HAS_OGG
 FLAC__StreamDecoderReadStatus read_callback_ogg_aspect_(const FLAC__StreamDecoder *decoder, FLAC__byte buffer[], size_t *bytes)
 {
@@ -2923,6 +3173,8 @@
 
 FLAC__StreamDecoderWriteStatus write_audio_frame_to_client_(FLAC__StreamDecoder *decoder, const FLAC__Frame *frame, const FLAC__int32 * const buffer[])
 {
+	decoder->private_->last_frame = *frame; /* save the frame */
+	decoder->private_->last_frame_is_set = true;
 	if(decoder->private_->is_seeking) {
 		FLAC__uint64 this_frame_sample = frame->header.number.sample_number;
 		FLAC__uint64 next_frame_sample = this_frame_sample + (FLAC__uint64)frame->header.blocksize;
@@ -2933,14 +3185,13 @@
 #if FLAC__HAS_OGG
 		decoder->private_->got_a_frame = true;
 #endif
-		decoder->private_->last_frame = *frame; /* save the frame */
 		if(this_frame_sample <= target_sample && target_sample < next_frame_sample) { /* we hit our target frame */
-			unsigned delta = (unsigned)(target_sample - this_frame_sample);
+			uint32_t delta = (uint32_t)(target_sample - this_frame_sample);
 			/* kick out of seek mode */
 			decoder->private_->is_seeking = false;
 			/* shift out the samples before target_sample */
 			if(delta > 0) {
-				unsigned channel;
+				uint32_t channel;
 				const FLAC__int32 *newbuffer[FLAC__MAX_CHANNELS];
 				for(channel = 0; channel < frame->header.channels; channel++)
 					newbuffer[channel] = buffer[channel] + delta;
@@ -2986,16 +3237,16 @@
 	FLAC__uint64 first_frame_offset = decoder->private_->first_frame_offset, lower_bound, upper_bound, lower_bound_sample, upper_bound_sample, this_frame_sample;
 	FLAC__int64 pos = -1;
 	int i;
-	unsigned approx_bytes_per_frame;
-	FLAC__bool first_seek = true;
+	uint32_t approx_bytes_per_frame;
+	FLAC__bool first_seek = true, seek_from_lower_bound = false;
 	const FLAC__uint64 total_samples = FLAC__stream_decoder_get_total_samples(decoder);
-	const unsigned min_blocksize = decoder->private_->stream_info.data.stream_info.min_blocksize;
-	const unsigned max_blocksize = decoder->private_->stream_info.data.stream_info.max_blocksize;
-	const unsigned max_framesize = decoder->private_->stream_info.data.stream_info.max_framesize;
-	const unsigned min_framesize = decoder->private_->stream_info.data.stream_info.min_framesize;
+	const uint32_t min_blocksize = decoder->private_->stream_info.data.stream_info.min_blocksize;
+	const uint32_t max_blocksize = decoder->private_->stream_info.data.stream_info.max_blocksize;
+	const uint32_t max_framesize = decoder->private_->stream_info.data.stream_info.max_framesize;
+	const uint32_t min_framesize = decoder->private_->stream_info.data.stream_info.min_framesize;
 	/* take these from the current frame in case they've changed mid-stream */
-	unsigned channels = FLAC__stream_decoder_get_channels(decoder);
-	unsigned bps = FLAC__stream_decoder_get_bits_per_sample(decoder);
+	uint32_t channels = FLAC__stream_decoder_get_channels(decoder);
+	uint32_t bps = FLAC__stream_decoder_get_bits_per_sample(decoder);
 	const FLAC__StreamMetadata_SeekTable *seek_table = decoder->private_->has_seek_table? &decoder->private_->seek_table.data.seek_table : 0;
 
 	/* use values from stream info if we didn't decode a frame */
@@ -3014,7 +3265,7 @@
 	 * min_blocksize might be zero.
 	 */
 	else if(min_blocksize == max_blocksize && min_blocksize > 0) {
-		/* note there are no () around 'bps/8' to keep precision up since it's an integer calulation */
+		/* note there are no () around 'bps/8' to keep precision up since it's an integer calculation */
 		approx_bytes_per_frame = min_blocksize * channels * bps/8 + 64;
 	}
 	else
@@ -3022,22 +3273,36 @@
 
 	/*
 	 * First, we set an upper and lower bound on where in the
-	 * stream we will search.  For now we assume the worst case
-	 * scenario, which is our best guess at the beginning of
-	 * the first frame and end of the stream.
+	 * stream we will search.  For now we take the current position
+	 * as one bound and, depending on where the target position lies,
+	 * the beginning of the first frame or the end of the stream as
+	 * the other bound.
 	 */
 	lower_bound = first_frame_offset;
 	lower_bound_sample = 0;
 	upper_bound = stream_length;
 	upper_bound_sample = total_samples > 0 ? total_samples : target_sample /*estimate it*/;
 
+	if(decoder->protected_->state == FLAC__STREAM_DECODER_SEARCH_FOR_FRAME_SYNC &&
+	   decoder->private_->samples_decoded != 0) {
+		if(target_sample < decoder->private_->samples_decoded) {
+			if(FLAC__stream_decoder_get_decode_position(decoder, &upper_bound))
+				upper_bound_sample = decoder->private_->samples_decoded;
+		} else {
+			if(FLAC__stream_decoder_get_decode_position(decoder, &lower_bound))
+				lower_bound_sample = decoder->private_->samples_decoded;
+		}
+	}
+
 	/*
 	 * Now we refine the bounds if we have a seektable with
 	 * suitable points.  Note that according to the spec they
 	 * must be ordered by ascending sample number.
 	 *
 	 * Note: to protect against invalid seek tables we will ignore points
-	 * that have frame_samples==0 or sample_number>=total_samples
+	 * that have frame_samples==0 or sample_number>=total_samples. Also,
+	 * because math is limited to 64-bit ints, seekpoints with an offset
+	 * larger than 2^63 (8 exbibyte) are rejected.
 	 */
 	if(seek_table) {
 		FLAC__uint64 new_lower_bound = lower_bound;
@@ -3098,20 +3363,34 @@
 
 	decoder->private_->target_sample = target_sample;
 	while(1) {
+		/* check whether decoder is still valid so bad state isn't overwritten
+		 * with seek error */
+		if(decoder->protected_->state == FLAC__STREAM_DECODER_MEMORY_ALLOCATION_ERROR ||
+		   decoder->protected_->state == FLAC__STREAM_DECODER_ABORTED)
+			return false;
 		/* check if the bounds are still ok */
-		if (lower_bound_sample >= upper_bound_sample || lower_bound > upper_bound) {
+		if (lower_bound_sample >= upper_bound_sample ||
+		    lower_bound > upper_bound ||
+		    upper_bound >= INT64_MAX) {
 			decoder->protected_->state = FLAC__STREAM_DECODER_SEEK_ERROR;
 			return false;
 		}
+		if(seek_from_lower_bound) {
+			pos = lower_bound;
+		}
+		else {
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
-		pos = (FLAC__int64)lower_bound + (FLAC__int64)((FLAC__double)(target_sample - lower_bound_sample) / (FLAC__double)(upper_bound_sample - lower_bound_sample) * (FLAC__double)(upper_bound - lower_bound)) - approx_bytes_per_frame;
+			pos = (FLAC__int64)lower_bound + (FLAC__int64)((double)(target_sample - lower_bound_sample) / (double)(upper_bound_sample - lower_bound_sample) * (double)(upper_bound - lower_bound)) - approx_bytes_per_frame;
 #else
-		/* a little less accurate: */
-		if(upper_bound - lower_bound < 0xffffffff)
-			pos = (FLAC__int64)lower_bound + (FLAC__int64)(((target_sample - lower_bound_sample) * (upper_bound - lower_bound)) / (upper_bound_sample - lower_bound_sample)) - approx_bytes_per_frame;
-		else /* @@@ WATCHOUT, ~2TB limit */
-			pos = (FLAC__int64)lower_bound + (FLAC__int64)((((target_sample - lower_bound_sample)>>8) * ((upper_bound - lower_bound)>>8)) / ((upper_bound_sample - lower_bound_sample)>>16)) - approx_bytes_per_frame;
+			/* a little less accurate: */
+			if(upper_bound - lower_bound < 0xffffffff)
+				pos = (FLAC__int64)lower_bound + (FLAC__int64)(((target_sample - lower_bound_sample) * (upper_bound - lower_bound)) / (upper_bound_sample - lower_bound_sample)) - approx_bytes_per_frame;
+			else { /* @@@ WATCHOUT, ~2TB limit */
+			        FLAC__uint64 ratio = (1<<16) / (upper_bound_sample - lower_bound_sample);
+				pos = (FLAC__int64)lower_bound + (FLAC__int64)((((target_sample - lower_bound_sample)>>8) * ((upper_bound - lower_bound)>>8) * ratio)) - approx_bytes_per_frame;
+			}
 #endif
+		}
 		if(pos >= (FLAC__int64)upper_bound)
 			pos = (FLAC__int64)upper_bound - 1;
 		if(pos < (FLAC__int64)lower_bound)
@@ -3131,24 +3410,32 @@
 		 * FLAC__stream_decoder_process_single() to return false.
 		 */
 		decoder->private_->unparseable_frame_count = 0;
-		if(!FLAC__stream_decoder_process_single(decoder)) {
-			decoder->protected_->state = FLAC__STREAM_DECODER_SEEK_ERROR;
-			return false;
+		if(!FLAC__stream_decoder_process_single(decoder) || decoder->protected_->state == FLAC__STREAM_DECODER_ABORTED || 0 == decoder->private_->samples_decoded) {
+			/* No frame could be decoded */
+			if(decoder->protected_->state != FLAC__STREAM_DECODER_ABORTED && decoder->private_->eof_callback(decoder, decoder->private_->client_data) && !seek_from_lower_bound){
+				/* decoder has hit end of stream while processing corrupt
+				 * frame. To remedy this, try decoding a frame at the lower
+				 * bound so the seek after that hopefully ends up somewhere
+				 * else */
+				seek_from_lower_bound = true;
+				continue;
+			}
+			else {
+				decoder->protected_->state = FLAC__STREAM_DECODER_SEEK_ERROR;
+				return false;
+			}
 		}
+		seek_from_lower_bound = false;
+
 		/* our write callback will change the state when it gets to the target frame */
 		/* actually, we could have got_a_frame if our decoder is at FLAC__STREAM_DECODER_END_OF_STREAM so we need to check for that also */
-#if 0
-		/*@@@@@@ used to be the following; not clear if the check for end of stream is needed anymore */
-		if(decoder->protected_->state != FLAC__SEEKABLE_STREAM_DECODER_SEEKING && decoder->protected_->state != FLAC__STREAM_DECODER_END_OF_STREAM)
-			break;
-#endif
 		if(!decoder->private_->is_seeking)
 			break;
 
 		FLAC__ASSERT(decoder->private_->last_frame.header.number_type == FLAC__FRAME_NUMBER_TYPE_SAMPLE_NUMBER);
 		this_frame_sample = decoder->private_->last_frame.header.number.sample_number;
 
-		if (0 == decoder->private_->samples_decoded || (this_frame_sample + decoder->private_->last_frame.header.blocksize >= upper_bound_sample && !first_seek)) {
+		if(this_frame_sample + decoder->private_->last_frame.header.blocksize >= upper_bound_sample && !first_seek) {
 			if (pos == (FLAC__int64)lower_bound) {
 				/* can't move back any more than the first frame, something is fatally wrong */
 				decoder->protected_->state = FLAC__STREAM_DECODER_SEEK_ERROR;
@@ -3175,7 +3462,7 @@
 				decoder->protected_->state = FLAC__STREAM_DECODER_SEEK_ERROR;
 				return false;
 			}
-			approx_bytes_per_frame = (unsigned)(2 * (upper_bound - pos) / 3 + 16);
+			approx_bytes_per_frame = (uint32_t)(2 * (upper_bound - pos) / 3 + 16);
 		}
 		else { /* target_sample >= this_frame_sample + this frame's blocksize */
 			lower_bound_sample = this_frame_sample + decoder->private_->last_frame.header.blocksize;
@@ -3183,7 +3470,7 @@
 				decoder->protected_->state = FLAC__STREAM_DECODER_SEEK_ERROR;
 				return false;
 			}
-			approx_bytes_per_frame = (unsigned)(2 * (lower_bound - pos) / 3 + 16);
+			approx_bytes_per_frame = (uint32_t)(2 * (lower_bound - pos) / 3 + 16);
 		}
 	}
 
@@ -3198,14 +3485,14 @@
 	FLAC__uint64 this_frame_sample = (FLAC__uint64)0 - 1;
 	FLAC__uint64 pos = 0; /* only initialized to avoid compiler warning */
 	FLAC__bool did_a_seek;
-	unsigned iteration = 0;
+	uint32_t iteration = 0;
 
 	/* In the first iterations, we will calculate the target byte position
 	 * by the distance from the target sample to left_sample and
 	 * right_sample (let's call it "proportional search").  After that, we
 	 * will switch to binary search.
 	 */
-	unsigned BINARY_SEARCH_AFTER_ITERATION = 2;
+	uint32_t BINARY_SEARCH_AFTER_ITERATION = 2;
 
 	/* We will switch to a linear search once our current sample is less
 	 * than this number of samples ahead of the target sample
@@ -3222,13 +3509,19 @@
 
 	decoder->private_->target_sample = target_sample;
 	for( ; ; iteration++) {
+		/* Do sanity checks on bounds */
+		if(right_pos <= left_pos || right_pos - left_pos < 9) {
+			/* FLAC frame is at least 9 byte in size */
+			decoder->protected_->state = FLAC__STREAM_DECODER_SEEK_ERROR;
+			return false;
+		}
 		if (iteration == 0 || this_frame_sample > target_sample || target_sample - this_frame_sample > LINEAR_SEARCH_WITHIN_SAMPLES) {
 			if (iteration >= BINARY_SEARCH_AFTER_ITERATION) {
 				pos = (right_pos + left_pos) / 2;
 			}
 			else {
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
-				pos = (FLAC__uint64)((FLAC__double)(target_sample - left_sample) / (FLAC__double)(right_sample - left_sample) * (FLAC__double)(right_pos - left_pos));
+				pos = (FLAC__uint64)((double)(target_sample - left_sample) / (double)(right_sample - left_sample) * (double)(right_pos - left_pos));
 #else
 				/* a little less accurate: */
 				if ((target_sample-left_sample <= 0xffffffff) && (right_pos-left_pos <= 0xffffffff))
@@ -3258,7 +3551,8 @@
 			did_a_seek = false;
 
 		decoder->private_->got_a_frame = false;
-		if(!FLAC__stream_decoder_process_single(decoder)) {
+		if(!FLAC__stream_decoder_process_single(decoder) ||
+		   decoder->protected_->state == FLAC__STREAM_DECODER_ABORTED) {
 			decoder->protected_->state = FLAC__STREAM_DECODER_SEEK_ERROR;
 			return false;
 		}
@@ -3305,7 +3599,7 @@
 					}
 					left_pos = pos;
 				}
-				else if(this_frame_sample > target_sample) {
+				else {
 					right_sample = this_frame_sample;
 					/* sanity check to avoid infinite loop */
 					if (right_pos == pos) {
@@ -3373,7 +3667,13 @@
 
 	if(decoder->private_->file == stdin)
 		return FLAC__STREAM_DECODER_LENGTH_STATUS_UNSUPPORTED;
-	else if(flac_fstat(fileno(decoder->private_->file), &filestats) != 0)
+
+#ifndef FLAC__USE_FILELENGTHI64
+	if(flac_fstat(fileno(decoder->private_->file), &filestats) != 0)
+#else
+	filestats.st_size = _filelengthi64(fileno(decoder->private_->file));
+	if(filestats.st_size < 0)
+#endif
 		return FLAC__STREAM_DECODER_LENGTH_STATUS_ERROR;
 	else {
 		*stream_length = (FLAC__uint64)filestats.st_size;

diff --git a/src/libFLAC/stream_encoder.c b/src/libFLAC/stream_encoder.c
index 45bdb25..4d5beaf 100644
--- a/src/libFLAC/stream_encoder.c
+++ b/src/libFLAC/stream_encoder.c

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2000-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -39,6 +39,10 @@
 #include <stdlib.h> /* for malloc() */
 #include <string.h> /* for memcpy() */
 #include <sys/types.h> /* for off_t */
+#ifdef _WIN32
+#include <windows.h> /* for GetFileType() */
+#include <io.h> /* for _get_osfhandle() */
+#endif
 #include "share/compat.h"
 #include "FLAC/assert.h"
 #include "FLAC/stream_decoder.h"
@@ -79,14 +83,14 @@
 
 typedef struct {
 	FLAC__int32 *data[FLAC__MAX_CHANNELS];
-	unsigned size; /* of each data[] in samples */
-	unsigned tail;
+	uint32_t size; /* of each data[] in samples */
+	uint32_t tail;
 } verify_input_fifo;
 
 typedef struct {
 	const FLAC__byte *data;
-	unsigned capacity;
-	unsigned bytes;
+	uint32_t capacity;
+	uint32_t bytes;
 } verify_output;
 
 typedef enum {
@@ -95,17 +99,17 @@
 	ENCODER_IN_AUDIO = 2
 } EncoderStateHint;
 
-static struct CompressionLevels {
+static const  struct CompressionLevels {
 	FLAC__bool do_mid_side_stereo;
 	FLAC__bool loose_mid_side_stereo;
-	unsigned max_lpc_order;
-	unsigned qlp_coeff_precision;
+	uint32_t max_lpc_order;
+	uint32_t qlp_coeff_precision;
 	FLAC__bool do_qlp_coeff_prec_search;
 	FLAC__bool do_escape_coding;
 	FLAC__bool do_exhaustive_model_search;
-	unsigned min_residual_partition_order;
-	unsigned max_residual_partition_order;
-	unsigned rice_parameter_search_dist;
+	uint32_t min_residual_partition_order;
+	uint32_t max_residual_partition_order;
+	uint32_t rice_parameter_search_dist;
 	const char *apodization;
 } compression_levels_[] = {
 	{ false, false,  0, 0, false, false, false, 0, 3, 0, "tukey(5e-1)" },
@@ -114,9 +118,9 @@
 	{ false, false,  6, 0, false, false, false, 0, 4, 0, "tukey(5e-1)" },
 	{ true , true ,  8, 0, false, false, false, 0, 4, 0, "tukey(5e-1)" },
 	{ true , false,  8, 0, false, false, false, 0, 5, 0, "tukey(5e-1)" },
-	{ true , false,  8, 0, false, false, false, 0, 6, 0, "tukey(5e-1);partial_tukey(2)" },
-	{ true , false, 12, 0, false, false, false, 0, 6, 0, "tukey(5e-1);partial_tukey(2)" },
-	{ true , false, 12, 0, false, false, false, 0, 6, 0, "tukey(5e-1);partial_tukey(2);punchout_tukey(3)" }
+	{ true , false,  8, 0, false, false, false, 0, 6, 0, "subdivide_tukey(2)" },
+	{ true , false, 12, 0, false, false, false, 0, 6, 0, "subdivide_tukey(2)" },
+	{ true , false, 12, 0, false, false, false, 0, 6, 0, "subdivide_tukey(3)" }
 	/* here we use locale-independent 5e-1 instead of 0.5 or 0,5 */
 };
 
@@ -129,130 +133,127 @@
 
 static void set_defaults_(FLAC__StreamEncoder *encoder);
 static void free_(FLAC__StreamEncoder *encoder);
-static FLAC__bool resize_buffers_(FLAC__StreamEncoder *encoder, unsigned new_blocksize);
-static FLAC__bool write_bitbuffer_(FLAC__StreamEncoder *encoder, unsigned samples, FLAC__bool is_last_block);
-static FLAC__StreamEncoderWriteStatus write_frame_(FLAC__StreamEncoder *encoder, const FLAC__byte buffer[], size_t bytes, unsigned samples, FLAC__bool is_last_block);
+static FLAC__bool resize_buffers_(FLAC__StreamEncoder *encoder, uint32_t new_blocksize);
+static FLAC__bool write_bitbuffer_(FLAC__StreamEncoder *encoder, uint32_t samples, FLAC__bool is_last_block);
+static FLAC__StreamEncoderWriteStatus write_frame_(FLAC__StreamEncoder *encoder, const FLAC__byte buffer[], size_t bytes, uint32_t samples, FLAC__bool is_last_block);
 static void update_metadata_(const FLAC__StreamEncoder *encoder);
 #if FLAC__HAS_OGG
 static void update_ogg_metadata_(FLAC__StreamEncoder *encoder);
 #endif
-static FLAC__bool process_frame_(FLAC__StreamEncoder *encoder, FLAC__bool is_fractional_block, FLAC__bool is_last_block);
-static FLAC__bool process_subframes_(FLAC__StreamEncoder *encoder, FLAC__bool is_fractional_block);
+static FLAC__bool process_frame_(FLAC__StreamEncoder *encoder, FLAC__bool is_last_block);
+static FLAC__bool process_subframes_(FLAC__StreamEncoder *encoder);
 
 static FLAC__bool process_subframe_(
 	FLAC__StreamEncoder *encoder,
-	unsigned min_partition_order,
-	unsigned max_partition_order,
+	uint32_t min_partition_order,
+	uint32_t max_partition_order,
 	const FLAC__FrameHeader *frame_header,
-	unsigned subframe_bps,
-	const FLAC__int32 integer_signal[],
+	uint32_t subframe_bps,
+	const void *integer_signal,
 	FLAC__Subframe *subframe[2],
 	FLAC__EntropyCodingMethod_PartitionedRiceContents *partitioned_rice_contents[2],
 	FLAC__int32 *residual[2],
-	unsigned *best_subframe,
-	unsigned *best_bits
+	uint32_t *best_subframe,
+	uint32_t *best_bits
 );
 
 static FLAC__bool add_subframe_(
 	FLAC__StreamEncoder *encoder,
-	unsigned blocksize,
-	unsigned subframe_bps,
+	uint32_t blocksize,
+	uint32_t subframe_bps,
 	const FLAC__Subframe *subframe,
 	FLAC__BitWriter *frame
 );
 
-static unsigned evaluate_constant_subframe_(
+static uint32_t evaluate_constant_subframe_(
 	FLAC__StreamEncoder *encoder,
-	const FLAC__int32 signal,
-	unsigned blocksize,
-	unsigned subframe_bps,
+	const FLAC__int64 signal,
+	uint32_t blocksize,
+	uint32_t subframe_bps,
 	FLAC__Subframe *subframe
 );
 
-static unsigned evaluate_fixed_subframe_(
+static uint32_t evaluate_fixed_subframe_(
 	FLAC__StreamEncoder *encoder,
-	const FLAC__int32 signal[],
+	const void *signal,
 	FLAC__int32 residual[],
 	FLAC__uint64 abs_residual_partition_sums[],
-	unsigned raw_bits_per_partition[],
-	unsigned blocksize,
-	unsigned subframe_bps,
-	unsigned order,
-	unsigned rice_parameter,
-	unsigned rice_parameter_limit,
-	unsigned min_partition_order,
-	unsigned max_partition_order,
+	uint32_t raw_bits_per_partition[],
+	uint32_t blocksize,
+	uint32_t subframe_bps,
+	uint32_t order,
+	uint32_t rice_parameter_limit,
+	uint32_t min_partition_order,
+	uint32_t max_partition_order,
 	FLAC__bool do_escape_coding,
-	unsigned rice_parameter_search_dist,
+	uint32_t rice_parameter_search_dist,
 	FLAC__Subframe *subframe,
 	FLAC__EntropyCodingMethod_PartitionedRiceContents *partitioned_rice_contents
 );
 
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
-static unsigned evaluate_lpc_subframe_(
+static uint32_t evaluate_lpc_subframe_(
 	FLAC__StreamEncoder *encoder,
-	const FLAC__int32 signal[],
+	const void *signal,
 	FLAC__int32 residual[],
 	FLAC__uint64 abs_residual_partition_sums[],
-	unsigned raw_bits_per_partition[],
+	uint32_t raw_bits_per_partition[],
 	const FLAC__real lp_coeff[],
-	unsigned blocksize,
-	unsigned subframe_bps,
-	unsigned order,
-	unsigned qlp_coeff_precision,
-	unsigned rice_parameter,
-	unsigned rice_parameter_limit,
-	unsigned min_partition_order,
-	unsigned max_partition_order,
+	uint32_t blocksize,
+	uint32_t subframe_bps,
+	uint32_t order,
+	uint32_t qlp_coeff_precision,
+	uint32_t rice_parameter_limit,
+	uint32_t min_partition_order,
+	uint32_t max_partition_order,
 	FLAC__bool do_escape_coding,
-	unsigned rice_parameter_search_dist,
+	uint32_t rice_parameter_search_dist,
 	FLAC__Subframe *subframe,
 	FLAC__EntropyCodingMethod_PartitionedRiceContents *partitioned_rice_contents
 );
 #endif
 
-static unsigned evaluate_verbatim_subframe_(
+static uint32_t evaluate_verbatim_subframe_(
 	FLAC__StreamEncoder *encoder,
-	const FLAC__int32 signal[],
-	unsigned blocksize,
-	unsigned subframe_bps,
+	const void *signal,
+	uint32_t blocksize,
+	uint32_t subframe_bps,
 	FLAC__Subframe *subframe
 );
 
-static unsigned find_best_partition_order_(
+static uint32_t find_best_partition_order_(
 	struct FLAC__StreamEncoderPrivate *private_,
 	const FLAC__int32 residual[],
 	FLAC__uint64 abs_residual_partition_sums[],
-	unsigned raw_bits_per_partition[],
-	unsigned residual_samples,
-	unsigned predictor_order,
-	unsigned rice_parameter,
-	unsigned rice_parameter_limit,
-	unsigned min_partition_order,
-	unsigned max_partition_order,
-	unsigned bps,
+	uint32_t raw_bits_per_partition[],
+	uint32_t residual_samples,
+	uint32_t predictor_order,
+	uint32_t rice_parameter_limit,
+	uint32_t min_partition_order,
+	uint32_t max_partition_order,
+	uint32_t bps,
 	FLAC__bool do_escape_coding,
-	unsigned rice_parameter_search_dist,
+	uint32_t rice_parameter_search_dist,
 	FLAC__EntropyCodingMethod *best_ecm
 );
 
 static void precompute_partition_info_sums_(
 	const FLAC__int32 residual[],
 	FLAC__uint64 abs_residual_partition_sums[],
-	unsigned residual_samples,
-	unsigned predictor_order,
-	unsigned min_partition_order,
-	unsigned max_partition_order,
-	unsigned bps
+	uint32_t residual_samples,
+	uint32_t predictor_order,
+	uint32_t min_partition_order,
+	uint32_t max_partition_order,
+	uint32_t bps
 );
 
 static void precompute_partition_info_escapes_(
 	const FLAC__int32 residual[],
-	unsigned raw_bits_per_partition[],
-	unsigned residual_samples,
-	unsigned predictor_order,
-	unsigned min_partition_order,
-	unsigned max_partition_order
+	uint32_t raw_bits_per_partition[],
+	uint32_t residual_samples,
+	uint32_t predictor_order,
+	uint32_t min_partition_order,
+	uint32_t max_partition_order
 );
 
 static FLAC__bool set_partitioned_rice_(
@@ -260,35 +261,35 @@
 	const FLAC__int32 residual[],
 #endif
 	const FLAC__uint64 abs_residual_partition_sums[],
-	const unsigned raw_bits_per_partition[],
-	const unsigned residual_samples,
-	const unsigned predictor_order,
-	const unsigned suggested_rice_parameter,
-	const unsigned rice_parameter_limit,
-	const unsigned rice_parameter_search_dist,
-	const unsigned partition_order,
+	const uint32_t raw_bits_per_partition[],
+	const uint32_t residual_samples,
+	const uint32_t predictor_order,
+	const uint32_t rice_parameter_limit,
+	const uint32_t rice_parameter_search_dist,
+	const uint32_t partition_order,
 	const FLAC__bool search_for_escapes,
 	FLAC__EntropyCodingMethod_PartitionedRiceContents *partitioned_rice_contents,
-	unsigned *bits
+	uint32_t *bits
 );
 
-static unsigned get_wasted_bits_(FLAC__int32 signal[], unsigned samples);
+static uint32_t get_wasted_bits_(FLAC__int32 signal[], uint32_t samples);
+static uint32_t get_wasted_bits_wide_(FLAC__int64 signal_wide[], FLAC__int32 signal[], uint32_t samples);
 
 /* verify-related routines: */
 static void append_to_verify_fifo_(
 	verify_input_fifo *fifo,
 	const FLAC__int32 * const input[],
-	unsigned input_offset,
-	unsigned channels,
-	unsigned wide_samples
+	uint32_t input_offset,
+	uint32_t channels,
+	uint32_t wide_samples
 );
 
 static void append_to_verify_fifo_interleaved_(
 	verify_input_fifo *fifo,
 	const FLAC__int32 input[],
-	unsigned input_offset,
-	unsigned channels,
-	unsigned wide_samples
+	uint32_t input_offset,
+	uint32_t channels,
+	uint32_t wide_samples
 );
 
 static FLAC__StreamDecoderReadStatus verify_read_callback_(const FLAC__StreamDecoder *decoder, FLAC__byte buffer[], size_t *bytes, void *client_data);
@@ -299,7 +300,7 @@
 static FLAC__StreamEncoderReadStatus file_read_callback_(const FLAC__StreamEncoder *encoder, FLAC__byte buffer[], size_t *bytes, void *client_data);
 static FLAC__StreamEncoderSeekStatus file_seek_callback_(const FLAC__StreamEncoder *encoder, FLAC__uint64 absolute_byte_offset, void *client_data);
 static FLAC__StreamEncoderTellStatus file_tell_callback_(const FLAC__StreamEncoder *encoder, FLAC__uint64 *absolute_byte_offset, void *client_data);
-static FLAC__StreamEncoderWriteStatus file_write_callback_(const FLAC__StreamEncoder *encoder, const FLAC__byte buffer[], size_t bytes, unsigned samples, unsigned current_frame, void *client_data);
+static FLAC__StreamEncoderWriteStatus file_write_callback_(const FLAC__StreamEncoder *encoder, const FLAC__byte buffer[], size_t bytes, uint32_t samples, uint32_t current_frame, void *client_data);
 static FILE *get_binary_stdout_(void);
 
 
@@ -310,17 +311,18 @@
  ***********************************************************************/
 
 typedef struct FLAC__StreamEncoderPrivate {
-	unsigned input_capacity;                          /* current size (in samples) of the signal and residual buffers */
+	uint32_t input_capacity;                          /* current size (in samples) of the signal and residual buffers */
 	FLAC__int32 *integer_signal[FLAC__MAX_CHANNELS];  /* the integer version of the input signal */
 	FLAC__int32 *integer_signal_mid_side[2];          /* the integer version of the mid-side input signal (stereo only) */
+	FLAC__int64 *integer_signal_33bit_side;           /* 33-bit side for 32-bit stereo decorrelation */
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
 	FLAC__real *real_signal[FLAC__MAX_CHANNELS];      /* (@@@ currently unused) the floating-point version of the input signal */
 	FLAC__real *real_signal_mid_side[2];              /* (@@@ currently unused) the floating-point version of the mid-side input signal (stereo only) */
 	FLAC__real *window[FLAC__MAX_APODIZATION_FUNCTIONS]; /* the pre-computed floating-point window for each apodization function */
 	FLAC__real *windowed_signal;                      /* the integer_signal[] * current window[] */
 #endif
-	unsigned subframe_bps[FLAC__MAX_CHANNELS];        /* the effective bits per sample of the input signal (stream bps - wasted bits) */
-	unsigned subframe_bps_mid_side[2];                /* the effective bits per sample of the mid-side input signal (stream bps - wasted bits + 0/1) */
+	uint32_t subframe_bps[FLAC__MAX_CHANNELS];        /* the effective bits per sample of the input signal (stream bps - wasted bits) */
+	uint32_t subframe_bps_mid_side[2];                /* the effective bits per sample of the mid-side input signal (stream bps - wasted bits + 0/1) */
 	FLAC__int32 *residual_workspace[FLAC__MAX_CHANNELS][2]; /* each channel has a candidate and best workspace where the subframe residual signals will be stored */
 	FLAC__int32 *residual_workspace_mid_side[2][2];
 	FLAC__Subframe subframe_workspace[FLAC__MAX_CHANNELS][2];
@@ -331,45 +333,46 @@
 	FLAC__EntropyCodingMethod_PartitionedRiceContents partitioned_rice_contents_workspace_mid_side[FLAC__MAX_CHANNELS][2];
 	FLAC__EntropyCodingMethod_PartitionedRiceContents *partitioned_rice_contents_workspace_ptr[FLAC__MAX_CHANNELS][2];
 	FLAC__EntropyCodingMethod_PartitionedRiceContents *partitioned_rice_contents_workspace_ptr_mid_side[FLAC__MAX_CHANNELS][2];
-	unsigned best_subframe[FLAC__MAX_CHANNELS];       /* index (0 or 1) into 2nd dimension of the above workspaces */
-	unsigned best_subframe_mid_side[2];
-	unsigned best_subframe_bits[FLAC__MAX_CHANNELS];  /* size in bits of the best subframe for each channel */
-	unsigned best_subframe_bits_mid_side[2];
+	uint32_t best_subframe[FLAC__MAX_CHANNELS];       /* index (0 or 1) into 2nd dimension of the above workspaces */
+	uint32_t best_subframe_mid_side[2];
+	uint32_t best_subframe_bits[FLAC__MAX_CHANNELS];  /* size in bits of the best subframe for each channel */
+	uint32_t best_subframe_bits_mid_side[2];
 	FLAC__uint64 *abs_residual_partition_sums;        /* workspace where the sum of abs(candidate residual) for each partition is stored */
-	unsigned *raw_bits_per_partition;                 /* workspace where the sum of silog2(candidate residual) for each partition is stored */
+	uint32_t *raw_bits_per_partition;                 /* workspace where the sum of silog2(candidate residual) for each partition is stored */
 	FLAC__BitWriter *frame;                           /* the current frame being worked on */
-	unsigned loose_mid_side_stereo_frames;            /* rounded number of frames the encoder will use before trying both independent and mid/side frames again */
-	unsigned loose_mid_side_stereo_frame_count;       /* number of frames using the current channel assignment */
+	uint32_t loose_mid_side_stereo_frames;            /* rounded number of frames the encoder will use before trying both independent and mid/side frames again */
+	uint32_t loose_mid_side_stereo_frame_count;       /* number of frames using the current channel assignment */
 	FLAC__ChannelAssignment last_channel_assignment;
 	FLAC__StreamMetadata streaminfo;                  /* scratchpad for STREAMINFO as it is built */
 	FLAC__StreamMetadata_SeekTable *seek_table;       /* pointer into encoder->protected_->metadata_ where the seek table is */
-	unsigned current_sample_number;
-	unsigned current_frame_number;
+	uint32_t current_sample_number;
+	uint32_t current_frame_number;
 	FLAC__MD5Context md5context;
 	FLAC__CPUInfo cpuinfo;
-	void (*local_precompute_partition_info_sums)(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[], unsigned residual_samples, unsigned predictor_order, unsigned min_partition_order, unsigned max_partition_order, unsigned bps);
+	void (*local_precompute_partition_info_sums)(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[], uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t bps);
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
-	unsigned (*local_fixed_compute_best_predictor)(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
-	unsigned (*local_fixed_compute_best_predictor_wide)(const FLAC__int32 data[], unsigned data_len, FLAC__float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
+	uint32_t (*local_fixed_compute_best_predictor)(const FLAC__int32 data[], uint32_t data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
+	uint32_t (*local_fixed_compute_best_predictor_wide)(const FLAC__int32 data[], uint32_t data_len, float residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
 #else
-	unsigned (*local_fixed_compute_best_predictor)(const FLAC__int32 data[], unsigned data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
-	unsigned (*local_fixed_compute_best_predictor_wide)(const FLAC__int32 data[], unsigned data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
+	uint32_t (*local_fixed_compute_best_predictor)(const FLAC__int32 data[], uint32_t data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
+	uint32_t (*local_fixed_compute_best_predictor_wide)(const FLAC__int32 data[], uint32_t data_len, FLAC__fixedpoint residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1]);
 #endif
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
-	void (*local_lpc_compute_autocorrelation)(const FLAC__real data[], unsigned data_len, unsigned lag, FLAC__real autoc[]);
-	void (*local_lpc_compute_residual_from_qlp_coefficients)(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
-	void (*local_lpc_compute_residual_from_qlp_coefficients_64bit)(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
-	void (*local_lpc_compute_residual_from_qlp_coefficients_16bit)(const FLAC__int32 *data, unsigned data_len, const FLAC__int32 qlp_coeff[], unsigned order, int lp_quantization, FLAC__int32 residual[]);
+	void (*local_lpc_compute_autocorrelation)(const FLAC__real data[], uint32_t data_len, uint32_t lag, double autoc[]);
+	void (*local_lpc_compute_residual_from_qlp_coefficients)(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[]);
+	void (*local_lpc_compute_residual_from_qlp_coefficients_64bit)(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[]);
+	void (*local_lpc_compute_residual_from_qlp_coefficients_16bit)(const FLAC__int32 *data, uint32_t data_len, const FLAC__int32 qlp_coeff[], uint32_t order, int lp_quantization, FLAC__int32 residual[]);
 #endif
-	FLAC__bool use_wide_by_block;          /* use slow 64-bit versions of some functions because of the block size */
-	FLAC__bool use_wide_by_partition;      /* use slow 64-bit versions of some functions because of the min partition order and blocksize */
-	FLAC__bool use_wide_by_order;          /* use slow 64-bit versions of some functions because of the lpc order */
+	FLAC__bool disable_mmx;
+	FLAC__bool disable_sse2;
+	FLAC__bool disable_ssse3;
+	FLAC__bool disable_sse41;
+	FLAC__bool disable_avx2;
+	FLAC__bool disable_fma;
 	FLAC__bool disable_constant_subframes;
 	FLAC__bool disable_fixed_subframes;
 	FLAC__bool disable_verbatim_subframes;
-#if FLAC__HAS_OGG
 	FLAC__bool is_ogg;
-#endif
 	FLAC__StreamEncoderReadCallback read_callback; /* currently only needed for Ogg FLAC */
 	FLAC__StreamEncoderSeekCallback seek_callback;
 	FLAC__StreamEncoderTellCallback tell_callback;
@@ -377,15 +380,16 @@
 	FLAC__StreamEncoderMetadataCallback metadata_callback;
 	FLAC__StreamEncoderProgressCallback progress_callback;
 	void *client_data;
-	unsigned first_seekpoint_to_check;
+	uint32_t first_seekpoint_to_check;
 	FILE *file;                            /* only used when encoding to a file */
 	FLAC__uint64 bytes_written;
 	FLAC__uint64 samples_written;
-	unsigned frames_written;
-	unsigned total_frames_estimate;
+	uint32_t frames_written;
+	uint32_t total_frames_estimate;
 	/* unaligned (original) pointers to allocated data */
 	FLAC__int32 *integer_signal_unaligned[FLAC__MAX_CHANNELS];
 	FLAC__int32 *integer_signal_mid_side_unaligned[2];
+	FLAC__int64 *integer_signal_33bit_side_unaligned;
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
 	FLAC__real *real_signal_unaligned[FLAC__MAX_CHANNELS]; /* (@@@ currently unused) */
 	FLAC__real *real_signal_mid_side_unaligned[2]; /* (@@@ currently unused) */
@@ -395,7 +399,7 @@
 	FLAC__int32 *residual_workspace_unaligned[FLAC__MAX_CHANNELS][2];
 	FLAC__int32 *residual_workspace_mid_side_unaligned[2][2];
 	FLAC__uint64 *abs_residual_partition_sums_unaligned;
-	unsigned *raw_bits_per_partition_unaligned;
+	uint32_t *raw_bits_per_partition_unaligned;
 	/*
 	 * These fields have been moved here from private function local
 	 * declarations merely to save stack space during encoding.
@@ -415,9 +419,9 @@
 		verify_output output;
 		struct {
 			FLAC__uint64 absolute_sample;
-			unsigned frame_number;
-			unsigned channel;
-			unsigned sample;
+			uint32_t frame_number;
+			uint32_t channel;
+			uint32_t sample;
 			FLAC__int32 expected;
 			FLAC__int32 got;
 		} error_stats;
@@ -495,7 +499,7 @@
  * WATCHOUT: some parts of the code assert that OVERREAD_ == 1 and there's
  * not really any reason to change it.
  */
-static const unsigned OVERREAD_ = 1;
+static const uint32_t OVERREAD_ = 1;
 
 /***********************************************************************
  *
@@ -505,7 +509,7 @@
 FLAC_API FLAC__StreamEncoder *FLAC__stream_encoder_new(void)
 {
 	FLAC__StreamEncoder *encoder;
-	unsigned i;
+	uint32_t i;
 
 	FLAC__ASSERT(sizeof(int) >= 4); /* we want to die right away if this is not true */
 
@@ -537,6 +541,8 @@
 
 	encoder->private_->file = 0;
 
+	encoder->protected_->state = FLAC__STREAM_ENCODER_UNINITIALIZED;
+
 	set_defaults_(encoder);
 
 	encoder->private_->is_being_deleted = false;
@@ -569,14 +575,12 @@
 	for(i = 0; i < 2; i++)
 		FLAC__format_entropy_coding_method_partitioned_rice_contents_init(&encoder->private_->partitioned_rice_contents_extra[i]);
 
-	encoder->protected_->state = FLAC__STREAM_ENCODER_UNINITIALIZED;
-
 	return encoder;
 }
 
 FLAC_API void FLAC__stream_encoder_delete(FLAC__StreamEncoder *encoder)
 {
-	unsigned i;
+	uint32_t i;
 
 	if (encoder == NULL)
 		return ;
@@ -626,7 +630,7 @@
 	FLAC__bool is_ogg
 )
 {
-	unsigned i;
+	uint32_t i;
 	FLAC__bool metadata_has_seektable, metadata_has_vorbis_comment, metadata_picture_has_type1, metadata_picture_has_type2;
 
 	FLAC__ASSERT(0 != encoder);
@@ -634,10 +638,8 @@
 	if(encoder->protected_->state != FLAC__STREAM_ENCODER_UNINITIALIZED)
 		return FLAC__STREAM_ENCODER_INIT_STATUS_ALREADY_INITIALIZED;
 
-#if !FLAC__HAS_OGG
-	if(is_ogg)
+	if(FLAC__HAS_OGG == 0 && is_ogg)
 		return FLAC__STREAM_ENCODER_INIT_STATUS_UNSUPPORTED_CONTAINER;
-#endif
 
 	if(0 == write_callback || (seek_callback && 0 == tell_callback))
 		return FLAC__STREAM_ENCODER_INIT_STATUS_INVALID_CALLBACKS;
@@ -652,10 +654,7 @@
 	else if(!encoder->protected_->do_mid_side_stereo)
 		encoder->protected_->loose_mid_side_stereo = false;
 
-	if(encoder->protected_->bits_per_sample >= 32)
-		encoder->protected_->do_mid_side_stereo = false; /* since we currenty do 32-bit math, the side channel would have 33 bps and overflow */
-
-	if(encoder->protected_->bits_per_sample < FLAC__MIN_BITS_PER_SAMPLE || encoder->protected_->bits_per_sample > FLAC__REFERENCE_CODEC_MAX_BITS_PER_SAMPLE)
+	if(encoder->protected_->bits_per_sample < FLAC__MIN_BITS_PER_SAMPLE || encoder->protected_->bits_per_sample > FLAC__MAX_BITS_PER_SAMPLE)
 		return FLAC__STREAM_ENCODER_INIT_STATUS_INVALID_BITS_PER_SAMPLE;
 
 	if(!FLAC__format_sample_rate_is_valid(encoder->protected_->sample_rate))
@@ -722,7 +721,8 @@
 			encoder->protected_->bits_per_sample != 12 &&
 			encoder->protected_->bits_per_sample != 16 &&
 			encoder->protected_->bits_per_sample != 20 &&
-			encoder->protected_->bits_per_sample != 24
+			encoder->protected_->bits_per_sample != 24 &&
+			encoder->protected_->bits_per_sample != 32
 		)
 			return FLAC__STREAM_ENCODER_INIT_STATUS_NOT_STREAMABLE;
 		if(encoder->protected_->max_residual_partition_order > FLAC__SUBSET_MAX_RICE_PARTITION_ORDER)
@@ -746,7 +746,7 @@
 #if FLAC__HAS_OGG
 	/* reorder metadata if necessary to ensure that any VORBIS_COMMENT is the first, according to the mapping spec */
 	if(is_ogg && 0 != encoder->protected_->metadata && encoder->protected_->num_metadata_blocks > 1) {
-		unsigned i1;
+		uint32_t i1;
 		for(i1 = 1; i1 < encoder->protected_->num_metadata_blocks; i1++) {
 			if(0 != encoder->protected_->metadata[i1] && encoder->protected_->metadata[i1]->type == FLAC__METADATA_TYPE_VORBIS_COMMENT) {
 				FLAC__StreamMetadata *vc = encoder->protected_->metadata[i1];
@@ -760,7 +760,7 @@
 #endif
 	/* keep track of any SEEKTABLE block */
 	if(0 != encoder->protected_->metadata && encoder->protected_->num_metadata_blocks > 0) {
-		unsigned i2;
+		uint32_t i2;
 		for(i2 = 0; i2 < encoder->protected_->num_metadata_blocks; i2++) {
 			if(0 != encoder->protected_->metadata[i2] && encoder->protected_->metadata[i2]->type == FLAC__METADATA_TYPE_SEEKTABLE) {
 				encoder->private_->seek_table = &encoder->protected_->metadata[i2]->data.seek_table;
@@ -835,6 +835,7 @@
 		encoder->private_->real_signal_mid_side_unaligned[i] = encoder->private_->real_signal_mid_side[i] = 0;
 #endif
 	}
+	encoder->private_->integer_signal_33bit_side_unaligned = encoder->private_->integer_signal_33bit_side = 0;
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
 	for(i = 0; i < encoder->protected_->num_apodizations; i++)
 		encoder->private_->window_unaligned[i] = encoder->private_->window[i] = 0;
@@ -853,15 +854,15 @@
 	encoder->private_->abs_residual_partition_sums_unaligned = encoder->private_->abs_residual_partition_sums = 0;
 	encoder->private_->raw_bits_per_partition_unaligned = encoder->private_->raw_bits_per_partition = 0;
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
-	encoder->private_->loose_mid_side_stereo_frames = (unsigned)((FLAC__double)encoder->protected_->sample_rate * 0.4 / (FLAC__double)encoder->protected_->blocksize + 0.5);
+	encoder->private_->loose_mid_side_stereo_frames = (uint32_t)((double)encoder->protected_->sample_rate * 0.4 / (double)encoder->protected_->blocksize + 0.5);
 #else
 	/* 26214 is the approximate fixed-point equivalent to 0.4 (0.4 * 2^16) */
-	/* sample rate can be up to 655350 Hz, and thus use 20 bits, so we do the multiply&divide by hand */
-	FLAC__ASSERT(FLAC__MAX_SAMPLE_RATE <= 655350);
+	/* sample rate can be up to 1048575 Hz, and thus use 20 bits, so we do the multiply&divide by hand */
+	FLAC__ASSERT(FLAC__MAX_SAMPLE_RATE <= 1048575);
 	FLAC__ASSERT(FLAC__MAX_BLOCK_SIZE <= 65535);
-	FLAC__ASSERT(encoder->protected_->sample_rate <= 655350);
+	FLAC__ASSERT(encoder->protected_->sample_rate <= 1048575);
 	FLAC__ASSERT(encoder->protected_->blocksize <= 65535);
-	encoder->private_->loose_mid_side_stereo_frames = (unsigned)FLAC__fixedpoint_trunc((((FLAC__uint64)(encoder->protected_->sample_rate) * (FLAC__uint64)(26214)) << 16) / (encoder->protected_->blocksize<<16) + FLAC__FP_ONE_HALF);
+	encoder->private_->loose_mid_side_stereo_frames = (uint32_t)FLAC__fixedpoint_trunc((((FLAC__uint64)(encoder->protected_->sample_rate) * (FLAC__uint64)(26214)) << 16) / (encoder->protected_->blocksize<<16) + FLAC__FP_ONE_HALF);
 #endif
 	if(encoder->private_->loose_mid_side_stereo_frames == 0)
 		encoder->private_->loose_mid_side_stereo_frames = 1;
@@ -869,14 +870,24 @@
 	encoder->private_->current_sample_number = 0;
 	encoder->private_->current_frame_number = 0;
 
-	encoder->private_->use_wide_by_block = (encoder->protected_->bits_per_sample + FLAC__bitmath_ilog2(encoder->protected_->blocksize)+1 > 30);
-	encoder->private_->use_wide_by_order = (encoder->protected_->bits_per_sample + FLAC__bitmath_ilog2(flac_max(encoder->protected_->max_lpc_order, FLAC__MAX_FIXED_ORDER))+1 > 30); /*@@@ need to use this? */
-	encoder->private_->use_wide_by_partition = (false); /*@@@ need to set this */
-
 	/*
 	 * get the CPU info and set the function pointers
 	 */
 	FLAC__cpu_info(&encoder->private_->cpuinfo);
+	/* remove cpu info as requested by
+	 * FLAC__stream_encoder_disable_instruction_set */
+	if(encoder->private_->disable_mmx)
+		encoder->private_->cpuinfo.x86.mmx = false;
+	if(encoder->private_->disable_sse2)
+		encoder->private_->cpuinfo.x86.sse2 = false;
+	if(encoder->private_->disable_ssse3)
+		encoder->private_->cpuinfo.x86.ssse3 = false;
+	if(encoder->private_->disable_sse41)
+		encoder->private_->cpuinfo.x86.sse41 = false;
+	if(encoder->private_->disable_avx2)
+		encoder->private_->cpuinfo.x86.avx2 = false;
+	if(encoder->private_->disable_fma)
+		encoder->private_->cpuinfo.x86.fma = false;
 	/* first default to the non-asm routines */
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
 	encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation;
@@ -892,68 +903,74 @@
 	/* now override with asm where appropriate */
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
 # ifndef FLAC__NO_ASM
+#if defined(FLAC__CPU_PPC64) && defined(FLAC__USE_VSX)
+#ifdef FLAC__HAS_TARGET_POWER8
+#ifdef FLAC__HAS_TARGET_POWER9
+	if (encoder->private_->cpuinfo.ppc.arch_3_00) {
+		if(encoder->protected_->max_lpc_order < 8)
+			encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_8;
+		else if(encoder->protected_->max_lpc_order < 10)
+			encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_10;
+		else if(encoder->protected_->max_lpc_order < 14)
+			encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_power9_vsx_lag_14;
+		else
+			encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation;
+	} else
+#endif
+	if (encoder->private_->cpuinfo.ppc.arch_2_07) {
+		if(encoder->protected_->max_lpc_order < 8)
+			encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_8;
+		else if(encoder->protected_->max_lpc_order < 10)
+			encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_10;
+		else if(encoder->protected_->max_lpc_order < 14)
+			encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_power8_vsx_lag_14;
+		else
+			encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation;
+	}
+#endif
+#endif /* defined(FLAC__CPU_PPC64) && defined(FLAC__USE_VSX) */
+
+#if defined FLAC__CPU_ARM64 && FLAC__HAS_NEONINTRIN
+#if FLAC__HAS_A64NEONINTRIN
+	if(encoder->protected_->max_lpc_order < 8)
+		encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_neon_lag_8;
+	else if(encoder->protected_->max_lpc_order < 10)
+		encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_neon_lag_10;
+	else if(encoder->protected_->max_lpc_order < 14)
+		encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_neon_lag_14;
+	else
+		encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation;
+#endif
+    encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_neon;
+    encoder->private_->local_lpc_compute_residual_from_qlp_coefficients       = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_neon;
+    encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_64bit = FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_neon;
+#endif /* defined FLAC__CPU_ARM64 && FLAC__HAS_NEONINTRIN */
+
 	if(encoder->private_->cpuinfo.use_asm) {
 #  ifdef FLAC__CPU_IA32
 		FLAC__ASSERT(encoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_IA32);
-#   ifdef FLAC__HAS_NASM
-		if(encoder->private_->cpuinfo.ia32.sse) {
-			if(encoder->protected_->max_lpc_order < 4)
-				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_4;
-			else if(encoder->protected_->max_lpc_order < 8)
-				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_8;
-			else if(encoder->protected_->max_lpc_order < 12)
-				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_12;
-			else if(encoder->protected_->max_lpc_order < 16)
-				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_asm_ia32_sse_lag_16;
-			else
-				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_asm_ia32;
-		}
-		else
-			encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_asm_ia32;
-
-		encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_64bit = FLAC__lpc_compute_residual_from_qlp_coefficients_wide_asm_ia32; /* OPT_IA32: was really necessary for GCC < 4.9 */
-		if(encoder->private_->cpuinfo.ia32.mmx) {
-			encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32;
-			encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx;
-		}
-		else {
-			encoder->private_->local_lpc_compute_residual_from_qlp_coefficients = FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32;
-			encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32;
-		}
-
-		if(encoder->private_->cpuinfo.ia32.mmx && encoder->private_->cpuinfo.ia32.cmov)
-			encoder->private_->local_fixed_compute_best_predictor = FLAC__fixed_compute_best_predictor_asm_ia32_mmx_cmov;
-#   endif /* FLAC__HAS_NASM */
-#   ifdef FLAC__HAS_X86INTRIN
-#    if defined FLAC__SSE_SUPPORTED
-		if(encoder->private_->cpuinfo.ia32.sse) {
-			if(encoder->protected_->max_lpc_order < 4)
-				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4;
-			else if(encoder->protected_->max_lpc_order < 8)
-				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8;
-			else if(encoder->protected_->max_lpc_order < 12)
-				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12;
-			else if(encoder->protected_->max_lpc_order < 16)
-				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16;
-			else
-				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation;
-		}
-#    endif
-
+#   if FLAC__HAS_X86INTRIN
 #    ifdef FLAC__SSE2_SUPPORTED
-		if(encoder->private_->cpuinfo.ia32.sse2) {
+		if (encoder->private_->cpuinfo.x86.sse2) {
+			if(encoder->protected_->max_lpc_order < 8)
+				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse2_lag_8;
+			else if(encoder->protected_->max_lpc_order < 10)
+				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse2_lag_10;
+			else if(encoder->protected_->max_lpc_order < 14)
+				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse2_lag_14;
+
 			encoder->private_->local_lpc_compute_residual_from_qlp_coefficients       = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse2;
 			encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2;
 		}
 #    endif
 #    ifdef FLAC__SSE4_1_SUPPORTED
-		if(encoder->private_->cpuinfo.ia32.sse41) {
+		if (encoder->private_->cpuinfo.x86.sse41) {
 			encoder->private_->local_lpc_compute_residual_from_qlp_coefficients       = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_sse41;
 			encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_64bit = FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_sse41;
 		}
 #    endif
 #    ifdef FLAC__AVX2_SUPPORTED
-		if(encoder->private_->cpuinfo.ia32.avx2) {
+		if (encoder->private_->cpuinfo.x86.avx2) {
 			encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_avx2;
 			encoder->private_->local_lpc_compute_residual_from_qlp_coefficients       = FLAC__lpc_compute_residual_from_qlp_coefficients_intrin_avx2;
 			encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_64bit = FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2;
@@ -961,13 +978,13 @@
 #    endif
 
 #    ifdef FLAC__SSE2_SUPPORTED
-		if (encoder->private_->cpuinfo.ia32.sse2) {
+		if (encoder->private_->cpuinfo.x86.sse2) {
 			encoder->private_->local_fixed_compute_best_predictor      = FLAC__fixed_compute_best_predictor_intrin_sse2;
 			encoder->private_->local_fixed_compute_best_predictor_wide = FLAC__fixed_compute_best_predictor_wide_intrin_sse2;
 		}
 #    endif
 #    ifdef FLAC__SSSE3_SUPPORTED
-		if (encoder->private_->cpuinfo.ia32.ssse3) {
+		if (encoder->private_->cpuinfo.x86.ssse3) {
 			encoder->private_->local_fixed_compute_best_predictor      = FLAC__fixed_compute_best_predictor_intrin_ssse3;
 			encoder->private_->local_fixed_compute_best_predictor_wide = FLAC__fixed_compute_best_predictor_wide_intrin_ssse3;
 		}
@@ -975,20 +992,18 @@
 #   endif /* FLAC__HAS_X86INTRIN */
 #  elif defined FLAC__CPU_X86_64
 		FLAC__ASSERT(encoder->private_->cpuinfo.type == FLAC__CPUINFO_TYPE_X86_64);
-#   ifdef FLAC__HAS_X86INTRIN
-#    ifdef FLAC__SSE_SUPPORTED
-		if(encoder->protected_->max_lpc_order < 4)
-			encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_4;
-		else if(encoder->protected_->max_lpc_order < 8)
-			encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_8;
-		else if(encoder->protected_->max_lpc_order < 12)
-			encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_12;
-		else if(encoder->protected_->max_lpc_order < 16)
-			encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse_lag_16;
-#    endif
-
+#   if FLAC__HAS_X86INTRIN
 #    ifdef FLAC__SSE2_SUPPORTED
-		encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2;
+		if(encoder->private_->cpuinfo.x86.sse2) { /* For fuzzing */
+			if(encoder->protected_->max_lpc_order < 8)
+				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse2_lag_8;
+			else if(encoder->protected_->max_lpc_order < 10)
+				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse2_lag_10;
+			else if(encoder->protected_->max_lpc_order < 14)
+				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_sse2_lag_14;
+
+			encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit = FLAC__lpc_compute_residual_from_qlp_coefficients_16_intrin_sse2;
+		}
 #    endif
 #    ifdef FLAC__SSE4_1_SUPPORTED
 		if(encoder->private_->cpuinfo.x86.sse41) {
@@ -1002,10 +1017,23 @@
 			encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_64bit = FLAC__lpc_compute_residual_from_qlp_coefficients_wide_intrin_avx2;
 		}
 #    endif
+#    ifdef FLAC__FMA_SUPPORTED
+		if(encoder->private_->cpuinfo.x86.fma) {
+			if(encoder->protected_->max_lpc_order < 8)
+				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_fma_lag_8;
+			else if(encoder->protected_->max_lpc_order < 12)
+				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_fma_lag_12;
+			else if(encoder->protected_->max_lpc_order < 16)
+				encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation_intrin_fma_lag_16;
+		}
+#    endif
+
 
 #    ifdef FLAC__SSE2_SUPPORTED
-		encoder->private_->local_fixed_compute_best_predictor      = FLAC__fixed_compute_best_predictor_intrin_sse2;
-		encoder->private_->local_fixed_compute_best_predictor_wide = FLAC__fixed_compute_best_predictor_wide_intrin_sse2;
+		if(encoder->private_->cpuinfo.x86.sse2) { /* For fuzzing */
+			encoder->private_->local_fixed_compute_best_predictor      = FLAC__fixed_compute_best_predictor_intrin_sse2;
+			encoder->private_->local_fixed_compute_best_predictor_wide = FLAC__fixed_compute_best_predictor_wide_intrin_sse2;
+		}
 #    endif
 #    ifdef FLAC__SSSE3_SUPPORTED
 		if (encoder->private_->cpuinfo.x86.ssse3) {
@@ -1017,20 +1045,21 @@
 #  endif /* FLAC__CPU_... */
 	}
 # endif /* !FLAC__NO_ASM */
+
 #endif /* !FLAC__INTEGER_ONLY_LIBRARY */
-#if !defined FLAC__NO_ASM && defined FLAC__HAS_X86INTRIN
+#if !defined FLAC__NO_ASM && FLAC__HAS_X86INTRIN
 	if(encoder->private_->cpuinfo.use_asm) {
 # if defined FLAC__CPU_IA32
 #  ifdef FLAC__SSE2_SUPPORTED
-		if(encoder->private_->cpuinfo.ia32.sse2)
+		if (encoder->private_->cpuinfo.x86.sse2)
 			encoder->private_->local_precompute_partition_info_sums = FLAC__precompute_partition_info_sums_intrin_sse2;
 #  endif
 #  ifdef FLAC__SSSE3_SUPPORTED
-		if(encoder->private_->cpuinfo.ia32.ssse3)
+		if (encoder->private_->cpuinfo.x86.ssse3)
 			encoder->private_->local_precompute_partition_info_sums = FLAC__precompute_partition_info_sums_intrin_ssse3;
 #  endif
 #  ifdef FLAC__AVX2_SUPPORTED
-		if(encoder->private_->cpuinfo.ia32.avx2)
+		if (encoder->private_->cpuinfo.x86.avx2)
 			encoder->private_->local_precompute_partition_info_sums = FLAC__precompute_partition_info_sums_intrin_avx2;
 #  endif
 # elif defined FLAC__CPU_X86_64
@@ -1048,10 +1077,6 @@
 # endif /* FLAC__CPU_... */
 	}
 #endif /* !FLAC__NO_ASM && FLAC__HAS_X86INTRIN */
-	/* finally override based on wide-ness if necessary */
-	if(encoder->private_->use_wide_by_block) {
-		encoder->private_->local_fixed_compute_best_predictor = encoder->private_->local_fixed_compute_best_predictor_wide;
-	}
 
 	/* set state to OK; from here on, errors are fatal and we'll override the state then */
 	encoder->protected_->state = FLAC__STREAM_ENCODER_OK;
@@ -1315,7 +1340,8 @@
 	 * Windows can suffer quite badly from disk fragmentation. This can be
 	 * reduced significantly by setting the output buffer size to be 10MB.
 	 */
-	setvbuf(file, NULL, _IOFBF, 10*1024*1024);
+	if(GetFileType((HANDLE)_get_osfhandle(_fileno(file))) == FILE_TYPE_DISK)
+		setvbuf(file, NULL, _IOFBF, 10*1024*1024);
 #endif
 	encoder->private_->file = file;
 
@@ -1340,10 +1366,10 @@
 	}
 
 	{
-		unsigned blocksize = FLAC__stream_encoder_get_blocksize(encoder);
+		uint32_t blocksize = FLAC__stream_encoder_get_blocksize(encoder);
 
 		FLAC__ASSERT(blocksize != 0);
-		encoder->private_->total_frames_estimate = (unsigned)((FLAC__stream_encoder_get_total_samples_estimate(encoder) + blocksize - 1) / blocksize);
+		encoder->private_->total_frames_estimate = (uint32_t)((FLAC__stream_encoder_get_total_samples_estimate(encoder) + blocksize - 1) / blocksize);
 	}
 
 	return init_status;
@@ -1423,18 +1449,34 @@
 {
 	FLAC__bool error = false;
 
-	FLAC__ASSERT(0 != encoder);
+	if (encoder == NULL)
+		return false;
+
 	FLAC__ASSERT(0 != encoder->private_);
 	FLAC__ASSERT(0 != encoder->protected_);
 
-	if(encoder->protected_->state == FLAC__STREAM_ENCODER_UNINITIALIZED)
+	if(encoder->protected_->state == FLAC__STREAM_ENCODER_UNINITIALIZED){
+		if(encoder->protected_->metadata){ // True in case FLAC__stream_encoder_set_metadata was used but init failed
+			free(encoder->protected_->metadata);
+			encoder->protected_->metadata = 0;
+			encoder->protected_->num_metadata_blocks = 0;
+		}
+		if(0 != encoder->private_->file) {
+			if(encoder->private_->file != stdout)
+				fclose(encoder->private_->file);
+			encoder->private_->file = 0;
+		}
 		return true;
+	}
 
 	if(encoder->protected_->state == FLAC__STREAM_ENCODER_OK && !encoder->private_->is_being_deleted) {
 		if(encoder->private_->current_sample_number != 0) {
-			const FLAC__bool is_fractional_block = encoder->protected_->blocksize != encoder->private_->current_sample_number;
 			encoder->protected_->blocksize = encoder->private_->current_sample_number;
-			if(!process_frame_(encoder, is_fractional_block, /*is_last_block=*/true))
+			if(!resize_buffers_(encoder, encoder->protected_->blocksize)) {
+				/* the above function sets the state for us in case of an error */
+				return FLAC__STREAM_ENCODER_INIT_STATUS_ENCODER_ERROR;
+			}
+			if(!process_frame_(encoder, /*is_last_block=*/true))
 				error = true;
 		}
 	}
@@ -1539,7 +1581,7 @@
 	return true;
 }
 
-FLAC_API FLAC__bool FLAC__stream_encoder_set_channels(FLAC__StreamEncoder *encoder, unsigned value)
+FLAC_API FLAC__bool FLAC__stream_encoder_set_channels(FLAC__StreamEncoder *encoder, uint32_t value)
 {
 	FLAC__ASSERT(0 != encoder);
 	FLAC__ASSERT(0 != encoder->private_);
@@ -1550,7 +1592,7 @@
 	return true;
 }
 
-FLAC_API FLAC__bool FLAC__stream_encoder_set_bits_per_sample(FLAC__StreamEncoder *encoder, unsigned value)
+FLAC_API FLAC__bool FLAC__stream_encoder_set_bits_per_sample(FLAC__StreamEncoder *encoder, uint32_t value)
 {
 	FLAC__ASSERT(0 != encoder);
 	FLAC__ASSERT(0 != encoder->private_);
@@ -1561,7 +1603,7 @@
 	return true;
 }
 
-FLAC_API FLAC__bool FLAC__stream_encoder_set_sample_rate(FLAC__StreamEncoder *encoder, unsigned value)
+FLAC_API FLAC__bool FLAC__stream_encoder_set_sample_rate(FLAC__StreamEncoder *encoder, uint32_t value)
 {
 	FLAC__ASSERT(0 != encoder);
 	FLAC__ASSERT(0 != encoder->private_);
@@ -1572,7 +1614,7 @@
 	return true;
 }
 
-FLAC_API FLAC__bool FLAC__stream_encoder_set_compression_level(FLAC__StreamEncoder *encoder, unsigned value)
+FLAC_API FLAC__bool FLAC__stream_encoder_set_compression_level(FLAC__StreamEncoder *encoder, uint32_t value)
 {
 	FLAC__bool ok = true;
 	FLAC__ASSERT(0 != encoder);
@@ -1605,7 +1647,7 @@
 	return ok;
 }
 
-FLAC_API FLAC__bool FLAC__stream_encoder_set_blocksize(FLAC__StreamEncoder *encoder, unsigned value)
+FLAC_API FLAC__bool FLAC__stream_encoder_set_blocksize(FLAC__StreamEncoder *encoder, uint32_t value)
 {
 	FLAC__ASSERT(0 != encoder);
 	FLAC__ASSERT(0 != encoder->private_);
@@ -1692,7 +1734,7 @@
 				encoder->protected_->apodizations[encoder->protected_->num_apodizations++].type = FLAC__APODIZATION_TUKEY;
 			}
 		}
-		else if(n>15   && 0 == strncmp("partial_tukey("       , specification, 14)) {
+		else if(n>15   && 0 == strncmp("partial_tukey(", specification, 14)) {
 			FLAC__int32 tukey_parts = (FLAC__int32)strtod(specification+14, 0);
 			const char *si_1 = strchr(specification, '/');
 			FLAC__real overlap = si_1?flac_min((FLAC__real)strtod(si_1+1, 0),0.99f):0.1f;
@@ -1713,7 +1755,7 @@
 				}
 			}
 		}
-		else if(n>16   && 0 == strncmp("punchout_tukey("       , specification, 15)) {
+		else if(n>16   && 0 == strncmp("punchout_tukey(", specification, 15)) {
 			FLAC__int32 tukey_parts = (FLAC__int32)strtod(specification+15, 0);
 			const char *si_1 = strchr(specification, '/');
 			FLAC__real overlap = si_1?flac_min((FLAC__real)strtod(si_1+1, 0),0.99f):0.2f;
@@ -1734,6 +1776,20 @@
 				}
 			}
 		}
+		else if(n>17  && 0 == strncmp("subdivide_tukey(", specification, 16)){
+			FLAC__int32 parts = (FLAC__int32)strtod(specification+16, 0);
+			if(parts > 1){
+				const char *si_1 = strchr(specification, '/');
+				FLAC__real p = si_1?(FLAC__real)strtod(si_1+1, 0):5e-1;
+				if(p > 1)
+					p = 1;
+				else if(p < 0)
+					p = 0;
+				encoder->protected_->apodizations[encoder->protected_->num_apodizations].parameters.subdivide_tukey.parts = parts;
+				encoder->protected_->apodizations[encoder->protected_->num_apodizations].parameters.subdivide_tukey.p = p/parts;
+				encoder->protected_->apodizations[encoder->protected_->num_apodizations++].type = FLAC__APODIZATION_SUBDIVIDE_TUKEY;
+			}
+		}
 		else if(n==5  && 0 == strncmp("welch"        , specification, n))
 			encoder->protected_->apodizations[encoder->protected_->num_apodizations++].type = FLAC__APODIZATION_WELCH;
 		if (encoder->protected_->num_apodizations == 32)
@@ -1752,7 +1808,7 @@
 	return true;
 }
 
-FLAC_API FLAC__bool FLAC__stream_encoder_set_max_lpc_order(FLAC__StreamEncoder *encoder, unsigned value)
+FLAC_API FLAC__bool FLAC__stream_encoder_set_max_lpc_order(FLAC__StreamEncoder *encoder, uint32_t value)
 {
 	FLAC__ASSERT(0 != encoder);
 	FLAC__ASSERT(0 != encoder->private_);
@@ -1763,7 +1819,7 @@
 	return true;
 }
 
-FLAC_API FLAC__bool FLAC__stream_encoder_set_qlp_coeff_precision(FLAC__StreamEncoder *encoder, unsigned value)
+FLAC_API FLAC__bool FLAC__stream_encoder_set_qlp_coeff_precision(FLAC__StreamEncoder *encoder, uint32_t value)
 {
 	FLAC__ASSERT(0 != encoder);
 	FLAC__ASSERT(0 != encoder->private_);
@@ -1792,8 +1848,10 @@
 	FLAC__ASSERT(0 != encoder->protected_);
 	if(encoder->protected_->state != FLAC__STREAM_ENCODER_UNINITIALIZED)
 		return false;
-#if 0
-	/*@@@ deprecated: */
+#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
+	/* was deprecated since FLAC 1.0.4 (24-Sep-2002), but is needed for
+	 * full spec coverage, so this should be reenabled at some point.
+	 * For now only enable while fuzzing */
 	encoder->protected_->do_escape_coding = value;
 #else
 	(void)value;
@@ -1812,7 +1870,7 @@
 	return true;
 }
 
-FLAC_API FLAC__bool FLAC__stream_encoder_set_min_residual_partition_order(FLAC__StreamEncoder *encoder, unsigned value)
+FLAC_API FLAC__bool FLAC__stream_encoder_set_min_residual_partition_order(FLAC__StreamEncoder *encoder, uint32_t value)
 {
 	FLAC__ASSERT(0 != encoder);
 	FLAC__ASSERT(0 != encoder->private_);
@@ -1823,7 +1881,7 @@
 	return true;
 }
 
-FLAC_API FLAC__bool FLAC__stream_encoder_set_max_residual_partition_order(FLAC__StreamEncoder *encoder, unsigned value)
+FLAC_API FLAC__bool FLAC__stream_encoder_set_max_residual_partition_order(FLAC__StreamEncoder *encoder, uint32_t value)
 {
 	FLAC__ASSERT(0 != encoder);
 	FLAC__ASSERT(0 != encoder->private_);
@@ -1834,7 +1892,7 @@
 	return true;
 }
 
-FLAC_API FLAC__bool FLAC__stream_encoder_set_rice_parameter_search_dist(FLAC__StreamEncoder *encoder, unsigned value)
+FLAC_API FLAC__bool FLAC__stream_encoder_set_rice_parameter_search_dist(FLAC__StreamEncoder *encoder, uint32_t value)
 {
 	FLAC__ASSERT(0 != encoder);
 	FLAC__ASSERT(0 != encoder->private_);
@@ -1857,11 +1915,12 @@
 	FLAC__ASSERT(0 != encoder->protected_);
 	if(encoder->protected_->state != FLAC__STREAM_ENCODER_UNINITIALIZED)
 		return false;
+	value = flac_min(value, (FLAC__U64L(1) << FLAC__STREAM_METADATA_STREAMINFO_TOTAL_SAMPLES_LEN) - 1);
 	encoder->protected_->total_samples_estimate = value;
 	return true;
 }
 
-FLAC_API FLAC__bool FLAC__stream_encoder_set_metadata(FLAC__StreamEncoder *encoder, FLAC__StreamMetadata **metadata, unsigned num_blocks)
+FLAC_API FLAC__bool FLAC__stream_encoder_set_metadata(FLAC__StreamEncoder *encoder, FLAC__StreamMetadata **metadata, uint32_t num_blocks)
 {
 	FLAC__ASSERT(0 != encoder);
 	FLAC__ASSERT(0 != encoder->private_);
@@ -1893,10 +1952,37 @@
 	return true;
 }
 
+FLAC_API FLAC__bool FLAC__stream_encoder_set_limit_min_bitrate(FLAC__StreamEncoder *encoder, FLAC__bool value)
+{
+	FLAC__ASSERT(0 != encoder);
+	FLAC__ASSERT(0 != encoder->private_);
+	FLAC__ASSERT(0 != encoder->protected_);
+	if(encoder->protected_->state != FLAC__STREAM_ENCODER_UNINITIALIZED)
+		return false;
+	encoder->protected_->limit_min_bitrate = value;
+	return true;
+}
+
 /*
- * These three functions are not static, but not publically exposed in
- * include/FLAC/ either.  They are used by the test suite.
+ * These four functions are not static, but not publicly exposed in
+ * include/FLAC/ either.  They are used by the test suite and in fuzzing
  */
+FLAC_API FLAC__bool FLAC__stream_encoder_disable_instruction_set(FLAC__StreamEncoder *encoder, FLAC__bool value)
+{
+	FLAC__ASSERT(0 != encoder);
+	FLAC__ASSERT(0 != encoder->private_);
+	FLAC__ASSERT(0 != encoder->protected_);
+	if(encoder->protected_->state != FLAC__STREAM_ENCODER_UNINITIALIZED)
+		return false;
+	encoder->private_->disable_mmx = value & 1;
+	encoder->private_->disable_sse2 = value & 2;
+	encoder->private_->disable_ssse3 = value & 4;
+	encoder->private_->disable_sse41 = value & 8;
+	encoder->private_->disable_avx2 = value & 16;
+	encoder->private_->disable_fma = value & 32;
+	return true;
+}
+
 FLAC_API FLAC__bool FLAC__stream_encoder_disable_constant_subframes(FLAC__StreamEncoder *encoder, FLAC__bool value)
 {
 	FLAC__ASSERT(0 != encoder);
@@ -1960,7 +2046,7 @@
 		return FLAC__stream_decoder_get_resolved_state_string(encoder->private_->verify.decoder);
 }
 
-FLAC_API void FLAC__stream_encoder_get_verify_decoder_error_stats(const FLAC__StreamEncoder *encoder, FLAC__uint64 *absolute_sample, unsigned *frame_number, unsigned *channel, unsigned *sample, FLAC__int32 *expected, FLAC__int32 *got)
+FLAC_API void FLAC__stream_encoder_get_verify_decoder_error_stats(const FLAC__StreamEncoder *encoder, FLAC__uint64 *absolute_sample, uint32_t *frame_number, uint32_t *channel, uint32_t *sample, FLAC__int32 *expected, FLAC__int32 *got)
 {
 	FLAC__ASSERT(0 != encoder);
 	FLAC__ASSERT(0 != encoder->private_);
@@ -2003,7 +2089,7 @@
 	return encoder->protected_->do_md5;
 }
 
-FLAC_API unsigned FLAC__stream_encoder_get_channels(const FLAC__StreamEncoder *encoder)
+FLAC_API uint32_t FLAC__stream_encoder_get_channels(const FLAC__StreamEncoder *encoder)
 {
 	FLAC__ASSERT(0 != encoder);
 	FLAC__ASSERT(0 != encoder->private_);
@@ -2011,7 +2097,7 @@
 	return encoder->protected_->channels;
 }
 
-FLAC_API unsigned FLAC__stream_encoder_get_bits_per_sample(const FLAC__StreamEncoder *encoder)
+FLAC_API uint32_t FLAC__stream_encoder_get_bits_per_sample(const FLAC__StreamEncoder *encoder)
 {
 	FLAC__ASSERT(0 != encoder);
 	FLAC__ASSERT(0 != encoder->private_);
@@ -2019,7 +2105,7 @@
 	return encoder->protected_->bits_per_sample;
 }
 
-FLAC_API unsigned FLAC__stream_encoder_get_sample_rate(const FLAC__StreamEncoder *encoder)
+FLAC_API uint32_t FLAC__stream_encoder_get_sample_rate(const FLAC__StreamEncoder *encoder)
 {
 	FLAC__ASSERT(0 != encoder);
 	FLAC__ASSERT(0 != encoder->private_);
@@ -2027,7 +2113,7 @@
 	return encoder->protected_->sample_rate;
 }
 
-FLAC_API unsigned FLAC__stream_encoder_get_blocksize(const FLAC__StreamEncoder *encoder)
+FLAC_API uint32_t FLAC__stream_encoder_get_blocksize(const FLAC__StreamEncoder *encoder)
 {
 	FLAC__ASSERT(0 != encoder);
 	FLAC__ASSERT(0 != encoder->private_);
@@ -2051,7 +2137,7 @@
 	return encoder->protected_->loose_mid_side_stereo;
 }
 
-FLAC_API unsigned FLAC__stream_encoder_get_max_lpc_order(const FLAC__StreamEncoder *encoder)
+FLAC_API uint32_t FLAC__stream_encoder_get_max_lpc_order(const FLAC__StreamEncoder *encoder)
 {
 	FLAC__ASSERT(0 != encoder);
 	FLAC__ASSERT(0 != encoder->private_);
@@ -2059,7 +2145,7 @@
 	return encoder->protected_->max_lpc_order;
 }
 
-FLAC_API unsigned FLAC__stream_encoder_get_qlp_coeff_precision(const FLAC__StreamEncoder *encoder)
+FLAC_API uint32_t FLAC__stream_encoder_get_qlp_coeff_precision(const FLAC__StreamEncoder *encoder)
 {
 	FLAC__ASSERT(0 != encoder);
 	FLAC__ASSERT(0 != encoder->private_);
@@ -2091,7 +2177,7 @@
 	return encoder->protected_->do_exhaustive_model_search;
 }
 
-FLAC_API unsigned FLAC__stream_encoder_get_min_residual_partition_order(const FLAC__StreamEncoder *encoder)
+FLAC_API uint32_t FLAC__stream_encoder_get_min_residual_partition_order(const FLAC__StreamEncoder *encoder)
 {
 	FLAC__ASSERT(0 != encoder);
 	FLAC__ASSERT(0 != encoder->private_);
@@ -2099,7 +2185,7 @@
 	return encoder->protected_->min_residual_partition_order;
 }
 
-FLAC_API unsigned FLAC__stream_encoder_get_max_residual_partition_order(const FLAC__StreamEncoder *encoder)
+FLAC_API uint32_t FLAC__stream_encoder_get_max_residual_partition_order(const FLAC__StreamEncoder *encoder)
 {
 	FLAC__ASSERT(0 != encoder);
 	FLAC__ASSERT(0 != encoder->private_);
@@ -2107,7 +2193,7 @@
 	return encoder->protected_->max_residual_partition_order;
 }
 
-FLAC_API unsigned FLAC__stream_encoder_get_rice_parameter_search_dist(const FLAC__StreamEncoder *encoder)
+FLAC_API uint32_t FLAC__stream_encoder_get_rice_parameter_search_dist(const FLAC__StreamEncoder *encoder)
 {
 	FLAC__ASSERT(0 != encoder);
 	FLAC__ASSERT(0 != encoder->private_);
@@ -2123,32 +2209,60 @@
 	return encoder->protected_->total_samples_estimate;
 }
 
-FLAC_API FLAC__bool FLAC__stream_encoder_process(FLAC__StreamEncoder *encoder, const FLAC__int32 * const buffer[], unsigned samples)
+FLAC_API FLAC__bool FLAC__stream_encoder_get_limit_min_bitrate(const FLAC__StreamEncoder *encoder)
 {
-	unsigned i, j = 0, channel;
-	const unsigned channels = encoder->protected_->channels, blocksize = encoder->protected_->blocksize;
+	FLAC__ASSERT(0 != encoder);
+	FLAC__ASSERT(0 != encoder->private_);
+	FLAC__ASSERT(0 != encoder->protected_);
+	return encoder->protected_->limit_min_bitrate;
+}
+
+FLAC_API FLAC__bool FLAC__stream_encoder_process(FLAC__StreamEncoder *encoder, const FLAC__int32 * const buffer[], uint32_t samples)
+{
+	uint32_t i, j = 0, k = 0, channel;
+	const uint32_t channels = encoder->protected_->channels, blocksize = encoder->protected_->blocksize, bps = encoder->protected_->bits_per_sample;
+	const FLAC__int32 sample_max = INT32_MAX >> (32 - encoder->protected_->bits_per_sample);
+	const FLAC__int32 sample_min = INT32_MIN >> (32 - encoder->protected_->bits_per_sample);
 
 	FLAC__ASSERT(0 != encoder);
 	FLAC__ASSERT(0 != encoder->private_);
 	FLAC__ASSERT(0 != encoder->protected_);
-	FLAC__ASSERT(encoder->protected_->state == FLAC__STREAM_ENCODER_OK);
+
+	if(encoder->protected_->state != FLAC__STREAM_ENCODER_OK)
+		return false;
 
 	do {
-		const unsigned n = flac_min(blocksize+OVERREAD_-encoder->private_->current_sample_number, samples-j);
+		const uint32_t n = flac_min(blocksize+OVERREAD_-encoder->private_->current_sample_number, samples-j);
 
 		if(encoder->protected_->verify)
 			append_to_verify_fifo_(&encoder->private_->verify.input_fifo, buffer, j, channels, n);
 
-		for(channel = 0; channel < channels; channel++)
+		for(channel = 0; channel < channels; channel++) {
+			for(i = encoder->private_->current_sample_number, k = j; i <= blocksize && k < samples; i++, k++) {
+				if(buffer[channel][k] < sample_min || buffer[channel][k] > sample_max){
+					encoder->protected_->state = FLAC__STREAM_ENCODER_CLIENT_ERROR;
+					return false;
+				}
+			}
+			if (buffer[channel] == NULL) {
+				return false;
+			}
 			memcpy(&encoder->private_->integer_signal[channel][encoder->private_->current_sample_number], &buffer[channel][j], sizeof(buffer[channel][0]) * n);
+		}
 
 		if(encoder->protected_->do_mid_side_stereo) {
 			FLAC__ASSERT(channels == 2);
 			/* "i <= blocksize" to overread 1 sample; see comment in OVERREAD_ decl */
-			for(i = encoder->private_->current_sample_number; i <= blocksize && j < samples; i++, j++) {
-				encoder->private_->integer_signal_mid_side[1][i] = buffer[0][j] - buffer[1][j];
-				encoder->private_->integer_signal_mid_side[0][i] = (buffer[0][j] + buffer[1][j]) >> 1; /* NOTE: not the same as 'mid = (buffer[0][j] + buffer[1][j]) / 2' ! */
-			}
+			if(bps < 32)
+				for(i = encoder->private_->current_sample_number; i <= blocksize && j < samples; i++, j++) {
+					encoder->private_->integer_signal_mid_side[1][i] = buffer[0][j] - buffer[1][j];
+					encoder->private_->integer_signal_mid_side[0][i] = (buffer[0][j] + buffer[1][j]) >> 1; /* NOTE: not the same as 'mid = (buffer[0][j] + buffer[1][j]) / 2' ! */
+				}
+			else
+				for(i = encoder->private_->current_sample_number; i <= blocksize && j < samples; i++, j++) {
+					encoder->private_->integer_signal_33bit_side[i] = (FLAC__int64)buffer[0][j] - (FLAC__int64)buffer[1][j];
+					encoder->private_->integer_signal_mid_side[0][i] = ((FLAC__int64)buffer[0][j] + (FLAC__int64)buffer[1][j]) >> 1; /* NOTE: not the same as 'mid = (buffer[0][j] + buffer[1][j]) / 2' ! */
+				}
 		}
 		else
 			j += n;
@@ -2159,14 +2273,17 @@
 		if(encoder->private_->current_sample_number > blocksize) {
 			FLAC__ASSERT(encoder->private_->current_sample_number == blocksize+OVERREAD_);
 			FLAC__ASSERT(OVERREAD_ == 1); /* assert we only overread 1 sample which simplifies the rest of the code below */
-			if(!process_frame_(encoder, /*is_fractional_block=*/false, /*is_last_block=*/false))
+			if(!process_frame_(encoder, /*is_last_block=*/false))
 				return false;
 			/* move unprocessed overread samples to beginnings of arrays */
 			for(channel = 0; channel < channels; channel++)
 				encoder->private_->integer_signal[channel][0] = encoder->private_->integer_signal[channel][blocksize];
 			if(encoder->protected_->do_mid_side_stereo) {
 				encoder->private_->integer_signal_mid_side[0][0] = encoder->private_->integer_signal_mid_side[0][blocksize];
-				encoder->private_->integer_signal_mid_side[1][0] = encoder->private_->integer_signal_mid_side[1][blocksize];
+				if(bps < 32)
+					encoder->private_->integer_signal_mid_side[1][0] = encoder->private_->integer_signal_mid_side[1][blocksize];
+				else
+					encoder->private_->integer_signal_33bit_side[0] = encoder->private_->integer_signal_33bit_side[blocksize];
 			}
 			encoder->private_->current_sample_number = 1;
 		}
@@ -2175,16 +2292,19 @@
 	return true;
 }
 
-FLAC_API FLAC__bool FLAC__stream_encoder_process_interleaved(FLAC__StreamEncoder *encoder, const FLAC__int32 buffer[], unsigned samples)
+FLAC_API FLAC__bool FLAC__stream_encoder_process_interleaved(FLAC__StreamEncoder *encoder, const FLAC__int32 buffer[], uint32_t samples)
 {
-	unsigned i, j, k, channel;
-	FLAC__int32 x, mid, side;
-	const unsigned channels = encoder->protected_->channels, blocksize = encoder->protected_->blocksize;
+	uint32_t i, j, k, channel;
+	const uint32_t channels = encoder->protected_->channels, blocksize = encoder->protected_->blocksize, bps = encoder->protected_->bits_per_sample;
+	const FLAC__int32 sample_max = INT32_MAX >> (32 - encoder->protected_->bits_per_sample);
+	const FLAC__int32 sample_min = INT32_MIN >> (32 - encoder->protected_->bits_per_sample);
 
 	FLAC__ASSERT(0 != encoder);
 	FLAC__ASSERT(0 != encoder->private_);
 	FLAC__ASSERT(0 != encoder->protected_);
-	FLAC__ASSERT(encoder->protected_->state == FLAC__STREAM_ENCODER_OK);
+
+	if(encoder->protected_->state != FLAC__STREAM_ENCODER_OK)
+		return false;
 
 	j = k = 0;
 	/*
@@ -2201,19 +2321,26 @@
 
 			/* "i <= blocksize" to overread 1 sample; see comment in OVERREAD_ decl */
 			for(i = encoder->private_->current_sample_number; i <= blocksize && j < samples; i++, j++) {
-				encoder->private_->integer_signal[0][i] = mid = side = buffer[k++];
-				x = buffer[k++];
-				encoder->private_->integer_signal[1][i] = x;
-				mid += x;
-				side -= x;
-				mid >>= 1; /* NOTE: not the same as 'mid = (left + right) / 2' ! */
-				encoder->private_->integer_signal_mid_side[1][i] = side;
-				encoder->private_->integer_signal_mid_side[0][i] = mid;
+				if(buffer[k]   < sample_min || buffer[k]   > sample_max ||
+				   buffer[k+1] < sample_min || buffer[k+1] > sample_max){
+					encoder->protected_->state = FLAC__STREAM_ENCODER_CLIENT_ERROR;
+					return false;
+				}
+				encoder->private_->integer_signal[0][i] = buffer[k++];
+				encoder->private_->integer_signal[1][i] = buffer[k++];
+				if(bps < 32){
+					encoder->private_->integer_signal_mid_side[1][i] = encoder->private_->integer_signal[0][i] - encoder->private_->integer_signal[1][i];
+					encoder->private_->integer_signal_mid_side[0][i] = (encoder->private_->integer_signal[0][i] + encoder->private_->integer_signal[1][i]) >> 1;
+				}
+				else {
+					encoder->private_->integer_signal_33bit_side[i] = (FLAC__int64)encoder->private_->integer_signal[0][i] - (FLAC__int64)encoder->private_->integer_signal[1][i];
+					encoder->private_->integer_signal_mid_side[0][i] = ((FLAC__int64)encoder->private_->integer_signal[0][i] + (FLAC__int64)encoder->private_->integer_signal[1][i]) >> 1;
+				}
 			}
 			encoder->private_->current_sample_number = i;
 			/* we only process if we have a full block + 1 extra sample; final block is always handled by FLAC__stream_encoder_finish() */
 			if(i > blocksize) {
-				if(!process_frame_(encoder, /*is_fractional_block=*/false, /*is_last_block=*/false))
+				if(!process_frame_(encoder, /*is_last_block=*/false))
 					return false;
 				/* move unprocessed overread samples to beginnings of arrays */
 				FLAC__ASSERT(i == blocksize+OVERREAD_);
@@ -2221,7 +2348,10 @@
 				encoder->private_->integer_signal[0][0] = encoder->private_->integer_signal[0][blocksize];
 				encoder->private_->integer_signal[1][0] = encoder->private_->integer_signal[1][blocksize];
 				encoder->private_->integer_signal_mid_side[0][0] = encoder->private_->integer_signal_mid_side[0][blocksize];
-				encoder->private_->integer_signal_mid_side[1][0] = encoder->private_->integer_signal_mid_side[1][blocksize];
+				if(bps < 32)
+					encoder->private_->integer_signal_mid_side[1][0] = encoder->private_->integer_signal_mid_side[1][blocksize];
+				else
+					encoder->private_->integer_signal_33bit_side[0] = encoder->private_->integer_signal_33bit_side[blocksize];
 				encoder->private_->current_sample_number = 1;
 			}
 		} while(j < samples);
@@ -2236,13 +2366,18 @@
 
 			/* "i <= blocksize" to overread 1 sample; see comment in OVERREAD_ decl */
 			for(i = encoder->private_->current_sample_number; i <= blocksize && j < samples; i++, j++) {
-				for(channel = 0; channel < channels; channel++)
+				for(channel = 0; channel < channels; channel++){
+					if(buffer[k] < sample_min || buffer[k] > sample_max){
+						encoder->protected_->state = FLAC__STREAM_ENCODER_CLIENT_ERROR;
+						return false;
+					}
 					encoder->private_->integer_signal[channel][i] = buffer[k++];
+				}
 			}
 			encoder->private_->current_sample_number = i;
 			/* we only process if we have a full block + 1 extra sample; final block is always handled by FLAC__stream_encoder_finish() */
 			if(i > blocksize) {
-				if(!process_frame_(encoder, /*is_fractional_block=*/false, /*is_last_block=*/false))
+				if(!process_frame_(encoder, /*is_last_block=*/false))
 					return false;
 				/* move unprocessed overread samples to beginnings of arrays */
 				FLAC__ASSERT(i == blocksize+OVERREAD_);
@@ -2294,16 +2429,20 @@
 	encoder->protected_->max_residual_partition_order = 0;
 	encoder->protected_->rice_parameter_search_dist = 0;
 	encoder->protected_->total_samples_estimate = 0;
+	encoder->protected_->limit_min_bitrate = false;
 	encoder->protected_->metadata = 0;
 	encoder->protected_->num_metadata_blocks = 0;
 
 	encoder->private_->seek_table = 0;
+	encoder->private_->disable_mmx = false;
+	encoder->private_->disable_sse2 = false;
+	encoder->private_->disable_ssse3 = false;
+	encoder->private_->disable_sse41 = false;
+	encoder->private_->disable_avx2 = false;
 	encoder->private_->disable_constant_subframes = false;
 	encoder->private_->disable_fixed_subframes = false;
 	encoder->private_->disable_verbatim_subframes = false;
-#if FLAC__HAS_OGG
 	encoder->private_->is_ogg = false;
-#endif
 	encoder->private_->read_callback = 0;
 	encoder->private_->write_callback = 0;
 	encoder->private_->seek_callback = 0;
@@ -2321,7 +2460,7 @@
 
 void free_(FLAC__StreamEncoder *encoder)
 {
-	unsigned i, channel;
+	uint32_t i, channel;
 
 	FLAC__ASSERT(0 != encoder);
 	if(encoder->protected_->metadata) {
@@ -2353,6 +2492,10 @@
 		}
 #endif
 	}
+	if(0 != encoder->private_->integer_signal_33bit_side_unaligned){
+		free(encoder->private_->integer_signal_33bit_side_unaligned);
+		encoder->private_->integer_signal_33bit_side_unaligned = 0;
+	}
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
 	for(i = 0; i < encoder->protected_->num_apodizations; i++) {
 		if(0 != encoder->private_->window_unaligned[i]) {
@@ -2400,76 +2543,96 @@
 	FLAC__bitwriter_free(encoder->private_->frame);
 }
 
-FLAC__bool resize_buffers_(FLAC__StreamEncoder *encoder, unsigned new_blocksize)
+FLAC__bool resize_buffers_(FLAC__StreamEncoder *encoder, uint32_t new_blocksize)
 {
 	FLAC__bool ok;
-	unsigned i, channel;
+	uint32_t i, channel;
 
 	FLAC__ASSERT(new_blocksize > 0);
 	FLAC__ASSERT(encoder->protected_->state == FLAC__STREAM_ENCODER_OK);
-	FLAC__ASSERT(encoder->private_->current_sample_number == 0);
-
-	/* To avoid excessive malloc'ing, we only grow the buffer; no shrinking. */
-	if(new_blocksize <= encoder->private_->input_capacity)
-		return true;
 
 	ok = true;
 
-	/* WATCHOUT: FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx() and ..._intrin_sse2()
-	 * require that the input arrays (in our case the integer signals)
-	 * have a buffer of up to 3 zeroes in front (at negative indices) for
-	 * alignment purposes; we use 4 in front to keep the data well-aligned.
-	 */
+	/* To avoid excessive malloc'ing, we only grow the buffer; no shrinking. */
+	if(new_blocksize > encoder->private_->input_capacity) {
 
-	for(i = 0; ok && i < encoder->protected_->channels; i++) {
-		ok = ok && FLAC__memory_alloc_aligned_int32_array(new_blocksize+4+OVERREAD_, &encoder->private_->integer_signal_unaligned[i], &encoder->private_->integer_signal[i]);
-		memset(encoder->private_->integer_signal[i], 0, sizeof(FLAC__int32)*4);
-		encoder->private_->integer_signal[i] += 4;
-#ifndef FLAC__INTEGER_ONLY_LIBRARY
-#if 0 /* @@@ currently unused */
-		if(encoder->protected_->max_lpc_order > 0)
-			ok = ok && FLAC__memory_alloc_aligned_real_array(new_blocksize+OVERREAD_, &encoder->private_->real_signal_unaligned[i], &encoder->private_->real_signal[i]);
-#endif
-#endif
-	}
-	for(i = 0; ok && i < 2; i++) {
-		ok = ok && FLAC__memory_alloc_aligned_int32_array(new_blocksize+4+OVERREAD_, &encoder->private_->integer_signal_mid_side_unaligned[i], &encoder->private_->integer_signal_mid_side[i]);
-		memset(encoder->private_->integer_signal_mid_side[i], 0, sizeof(FLAC__int32)*4);
-		encoder->private_->integer_signal_mid_side[i] += 4;
-#ifndef FLAC__INTEGER_ONLY_LIBRARY
-#if 0 /* @@@ currently unused */
-		if(encoder->protected_->max_lpc_order > 0)
-			ok = ok && FLAC__memory_alloc_aligned_real_array(new_blocksize+OVERREAD_, &encoder->private_->real_signal_mid_side_unaligned[i], &encoder->private_->real_signal_mid_side[i]);
-#endif
-#endif
-	}
-#ifndef FLAC__INTEGER_ONLY_LIBRARY
-	if(ok && encoder->protected_->max_lpc_order > 0) {
-		for(i = 0; ok && i < encoder->protected_->num_apodizations; i++)
-			ok = ok && FLAC__memory_alloc_aligned_real_array(new_blocksize, &encoder->private_->window_unaligned[i], &encoder->private_->window[i]);
-		ok = ok && FLAC__memory_alloc_aligned_real_array(new_blocksize, &encoder->private_->windowed_signal_unaligned, &encoder->private_->windowed_signal);
-	}
-#endif
-	for(channel = 0; ok && channel < encoder->protected_->channels; channel++) {
-		for(i = 0; ok && i < 2; i++) {
-			ok = ok && FLAC__memory_alloc_aligned_int32_array(new_blocksize, &encoder->private_->residual_workspace_unaligned[channel][i], &encoder->private_->residual_workspace[channel][i]);
+		/* WATCHOUT: FLAC__lpc_compute_residual_from_qlp_coefficients_asm_ia32_mmx() and ..._intrin_sse2()
+		 * require that the input arrays (in our case the integer signals)
+		 * have a buffer of up to 3 zeroes in front (at negative indices) for
+		 * alignment purposes; we use 4 in front to keep the data well-aligned.
+		 */
+
+		for(i = 0; ok && i < encoder->protected_->channels; i++) {
+			ok = ok && FLAC__memory_alloc_aligned_int32_array(new_blocksize+4+OVERREAD_, &encoder->private_->integer_signal_unaligned[i], &encoder->private_->integer_signal[i]);
+			if(ok) {
+				memset(encoder->private_->integer_signal[i], 0, sizeof(FLAC__int32)*4);
+				encoder->private_->integer_signal[i] += 4;
+			}
 		}
-	}
-	for(channel = 0; ok && channel < 2; channel++) {
 		for(i = 0; ok && i < 2; i++) {
-			ok = ok && FLAC__memory_alloc_aligned_int32_array(new_blocksize, &encoder->private_->residual_workspace_mid_side_unaligned[channel][i], &encoder->private_->residual_workspace_mid_side[channel][i]);
+			ok = ok && FLAC__memory_alloc_aligned_int32_array(new_blocksize+4+OVERREAD_, &encoder->private_->integer_signal_mid_side_unaligned[i], &encoder->private_->integer_signal_mid_side[i]);
+			if(ok) {
+				memset(encoder->private_->integer_signal_mid_side[i], 0, sizeof(FLAC__int32)*4);
+				encoder->private_->integer_signal_mid_side[i] += 4;
+			}
 		}
+		ok = ok && FLAC__memory_alloc_aligned_int64_array(new_blocksize+4+OVERREAD_, &encoder->private_->integer_signal_33bit_side_unaligned, &encoder->private_->integer_signal_33bit_side);
+#ifndef FLAC__INTEGER_ONLY_LIBRARY
+		if(ok && encoder->protected_->max_lpc_order > 0) {
+			for(i = 0; ok && i < encoder->protected_->num_apodizations; i++)
+				ok = ok && FLAC__memory_alloc_aligned_real_array(new_blocksize, &encoder->private_->window_unaligned[i], &encoder->private_->window[i]);
+			ok = ok && FLAC__memory_alloc_aligned_real_array(new_blocksize, &encoder->private_->windowed_signal_unaligned, &encoder->private_->windowed_signal);
+		}
+#endif
+		for(channel = 0; ok && channel < encoder->protected_->channels; channel++) {
+			for(i = 0; ok && i < 2; i++) {
+				ok = ok && FLAC__memory_alloc_aligned_int32_array(new_blocksize, &encoder->private_->residual_workspace_unaligned[channel][i], &encoder->private_->residual_workspace[channel][i]);
+			}
+		}
+
+
+		for(channel = 0; ok && channel < encoder->protected_->channels; channel++) {
+			for(i = 0; ok && i < 2; i++) {
+				ok = ok && FLAC__format_entropy_coding_method_partitioned_rice_contents_ensure_size(&encoder->private_->partitioned_rice_contents_workspace[channel][i], encoder->protected_->max_residual_partition_order);
+				ok = ok && FLAC__format_entropy_coding_method_partitioned_rice_contents_ensure_size(&encoder->private_->partitioned_rice_contents_workspace[channel][i], encoder->protected_->max_residual_partition_order);
+			}
+		}
+
+		for(channel = 0; ok && channel < 2; channel++) {
+			for(i = 0; ok && i < 2; i++) {
+				ok = ok && FLAC__memory_alloc_aligned_int32_array(new_blocksize, &encoder->private_->residual_workspace_mid_side_unaligned[channel][i], &encoder->private_->residual_workspace_mid_side[channel][i]);
+			}
+		}
+
+		for(channel = 0; ok && channel < 2; channel++) {
+			for(i = 0; ok && i < 2; i++) {
+				ok = ok && FLAC__format_entropy_coding_method_partitioned_rice_contents_ensure_size(&encoder->private_->partitioned_rice_contents_workspace_mid_side[channel][i], encoder->protected_->max_residual_partition_order);
+			}
+		}
+
+		for(i = 0; ok && i < 2; i++) {
+			ok = ok && FLAC__format_entropy_coding_method_partitioned_rice_contents_ensure_size(&encoder->private_->partitioned_rice_contents_extra[i], encoder->protected_->max_residual_partition_order);
+		}
+
+
+		/* the *2 is an approximation to the series 1 + 1/2 + 1/4 + ... that sums tree occupies in a flat array */
+		/*@@@ new_blocksize*2 is too pessimistic, but to fix, we need smarter logic because a smaller new_blocksize can actually increase the # of partitions; would require moving this out into a separate function, then checking its capacity against the need of the current blocksize&min/max_partition_order (and maybe predictor order) */
+		ok = ok && FLAC__memory_alloc_aligned_uint64_array(new_blocksize * 2, &encoder->private_->abs_residual_partition_sums_unaligned, &encoder->private_->abs_residual_partition_sums);
+		if(encoder->protected_->do_escape_coding)
+			ok = ok && FLAC__memory_alloc_aligned_unsigned_array(new_blocksize * 2, &encoder->private_->raw_bits_per_partition_unaligned, &encoder->private_->raw_bits_per_partition);
+}
+	if(ok)
+		encoder->private_->input_capacity = new_blocksize;
+	else {
+		encoder->protected_->state = FLAC__STREAM_ENCODER_MEMORY_ALLOCATION_ERROR;
+		return ok;
 	}
-	/* the *2 is an approximation to the series 1 + 1/2 + 1/4 + ... that sums tree occupies in a flat array */
-	/*@@@ new_blocksize*2 is too pessimistic, but to fix, we need smarter logic because a smaller new_blocksize can actually increase the # of partitions; would require moving this out into a separate function, then checking its capacity against the need of the current blocksize&min/max_partition_order (and maybe predictor order) */
-	ok = ok && FLAC__memory_alloc_aligned_uint64_array(new_blocksize * 2, &encoder->private_->abs_residual_partition_sums_unaligned, &encoder->private_->abs_residual_partition_sums);
-	if(encoder->protected_->do_escape_coding)
-		ok = ok && FLAC__memory_alloc_aligned_unsigned_array(new_blocksize * 2, &encoder->private_->raw_bits_per_partition_unaligned, &encoder->private_->raw_bits_per_partition);
+
 
 	/* now adjust the windows if the blocksize has changed */
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
-	if(ok && new_blocksize != encoder->private_->input_capacity && encoder->protected_->max_lpc_order > 0) {
-		for(i = 0; ok && i < encoder->protected_->num_apodizations; i++) {
+	if(encoder->protected_->max_lpc_order > 0 && new_blocksize > 1) {
+		for(i = 0; i < encoder->protected_->num_apodizations; i++) {
 			switch(encoder->protected_->apodizations[i].type) {
 				case FLAC__APODIZATION_BARTLETT:
 					FLAC__window_bartlett(encoder->private_->window[i], new_blocksize);
@@ -2519,6 +2682,9 @@
 				case FLAC__APODIZATION_PUNCHOUT_TUKEY:
 					FLAC__window_punchout_tukey(encoder->private_->window[i], new_blocksize, encoder->protected_->apodizations[i].parameters.multiple_tukey.p, encoder->protected_->apodizations[i].parameters.multiple_tukey.start, encoder->protected_->apodizations[i].parameters.multiple_tukey.end);
 					break;
+				case FLAC__APODIZATION_SUBDIVIDE_TUKEY:
+					FLAC__window_tukey(encoder->private_->window[i], new_blocksize, encoder->protected_->apodizations[i].parameters.tukey.p);
+					break;
 				case FLAC__APODIZATION_WELCH:
 					FLAC__window_welch(encoder->private_->window[i], new_blocksize);
 					break;
@@ -2530,17 +2696,17 @@
 			}
 		}
 	}
+	if (new_blocksize <= FLAC__MAX_LPC_ORDER) {
+		/* intrinsics autocorrelation routines do not all handle cases in which lag might be
+		 * larger than data_len. Lag is one larger than the LPC order */
+		encoder->private_->local_lpc_compute_autocorrelation = FLAC__lpc_compute_autocorrelation;
+	}
 #endif
 
-	if(ok)
-		encoder->private_->input_capacity = new_blocksize;
-	else
-		encoder->protected_->state = FLAC__STREAM_ENCODER_MEMORY_ALLOCATION_ERROR;
-
-	return ok;
+	return true;
 }
 
-FLAC__bool write_bitbuffer_(FLAC__StreamEncoder *encoder, unsigned samples, FLAC__bool is_last_block)
+FLAC__bool write_bitbuffer_(FLAC__StreamEncoder *encoder, uint32_t samples, FLAC__bool is_last_block)
 {
 	const FLAC__byte *buffer;
 	size_t bytes;
@@ -2559,7 +2725,10 @@
 			encoder->private_->verify.needs_magic_hack = true;
 		}
 		else {
-			if(!FLAC__stream_decoder_process_single(encoder->private_->verify.decoder)) {
+			if(!FLAC__stream_decoder_process_single(encoder->private_->verify.decoder)
+			    || (!is_last_block
+				    && (FLAC__stream_encoder_get_verify_decoder_state(encoder) == FLAC__STREAM_DECODER_END_OF_STREAM))
+			    || encoder->protected_->state == FLAC__STREAM_ENCODER_VERIFY_DECODER_ERROR /* Happens when error callback was used */) {
 				FLAC__bitwriter_release_buffer(encoder->private_->frame);
 				FLAC__bitwriter_clear(encoder->private_->frame);
 				if(encoder->protected_->state != FLAC__STREAM_ENCODER_VERIFY_MISMATCH_IN_AUDIO_DATA)
@@ -2587,7 +2756,7 @@
 	return true;
 }
 
-FLAC__StreamEncoderWriteStatus write_frame_(FLAC__StreamEncoder *encoder, const FLAC__byte buffer[], size_t bytes, unsigned samples, FLAC__bool is_last_block)
+FLAC__StreamEncoderWriteStatus write_frame_(FLAC__StreamEncoder *encoder, const FLAC__byte buffer[], size_t bytes, uint32_t samples, FLAC__bool is_last_block)
 {
 	FLAC__StreamEncoderWriteStatus status;
 	FLAC__uint64 output_position = 0;
@@ -2619,11 +2788,11 @@
 	 * frame yet)
 	 */
 	if(0 != encoder->private_->seek_table && encoder->protected_->audio_offset > 0 && encoder->private_->seek_table->num_points > 0) {
-		const unsigned blocksize = FLAC__stream_encoder_get_blocksize(encoder);
+		const uint32_t blocksize = FLAC__stream_encoder_get_blocksize(encoder);
 		const FLAC__uint64 frame_first_sample = encoder->private_->samples_written;
 		const FLAC__uint64 frame_last_sample = frame_first_sample + (FLAC__uint64)blocksize - 1;
 		FLAC__uint64 test_sample;
-		unsigned i;
+		uint32_t i;
 		for(i = encoder->private_->first_seekpoint_to_check; i < encoder->private_->seek_table->num_points; i++) {
 			test_sample = encoder->private_->seek_table->points[i].sample_number;
 			if(test_sample > frame_last_sample) {
@@ -2685,10 +2854,10 @@
 {
 	FLAC__byte b[flac_max(6u, FLAC__STREAM_METADATA_SEEKPOINT_LENGTH)];
 	const FLAC__StreamMetadata *metadata = &encoder->private_->streaminfo;
-	const FLAC__uint64 samples = metadata->data.stream_info.total_samples;
-	const unsigned min_framesize = metadata->data.stream_info.min_framesize;
-	const unsigned max_framesize = metadata->data.stream_info.max_framesize;
-	const unsigned bps = metadata->data.stream_info.bits_per_sample;
+	FLAC__uint64 samples = metadata->data.stream_info.total_samples;
+	const uint32_t min_framesize = metadata->data.stream_info.min_framesize;
+	const uint32_t max_framesize = metadata->data.stream_info.max_framesize;
+	const uint32_t bps = metadata->data.stream_info.bits_per_sample;
 	FLAC__StreamEncoderSeekStatus seek_status;
 
 	FLAC__ASSERT(metadata->type == FLAC__METADATA_TYPE_STREAMINFO);
@@ -2702,7 +2871,7 @@
 	 * Write MD5 signature
 	 */
 	{
-		const unsigned md5_offset =
+		const uint32_t md5_offset =
 			FLAC__STREAM_METADATA_HEADER_LENGTH +
 			(
 				FLAC__STREAM_METADATA_STREAMINFO_MIN_BLOCK_SIZE_LEN +
@@ -2730,7 +2899,7 @@
 	 * Write total samples
 	 */
 	{
-		const unsigned total_samples_byte_offset =
+		const uint32_t total_samples_byte_offset =
 			FLAC__STREAM_METADATA_HEADER_LENGTH +
 			(
 				FLAC__STREAM_METADATA_STREAMINFO_MIN_BLOCK_SIZE_LEN +
@@ -2742,6 +2911,8 @@
 				FLAC__STREAM_METADATA_STREAMINFO_BITS_PER_SAMPLE_LEN
 				- 4
 			) / 8;
+		if(samples > (FLAC__U64L(1) << FLAC__STREAM_METADATA_STREAMINFO_TOTAL_SAMPLES_LEN))
+			samples = 0;
 
 		b[0] = ((FLAC__byte)(bps-1) << 4) | (FLAC__byte)((samples >> 32) & 0x0F);
 		b[1] = (FLAC__byte)((samples >> 24) & 0xFF);
@@ -2763,7 +2934,7 @@
 	 * Write min/max framesize
 	 */
 	{
-		const unsigned min_framesize_offset =
+		const uint32_t min_framesize_offset =
 			FLAC__STREAM_METADATA_HEADER_LENGTH +
 			(
 				FLAC__STREAM_METADATA_STREAMINFO_MIN_BLOCK_SIZE_LEN +
@@ -2791,7 +2962,7 @@
 	 * Write seektable
 	 */
 	if(0 != encoder->private_->seek_table && encoder->private_->seek_table->num_points > 0 && encoder->protected_->seektable_offset > 0) {
-		unsigned i;
+		uint32_t i;
 
 		FLAC__format_seektable_sort(encoder->private_->seek_table);
 
@@ -2805,7 +2976,7 @@
 
 		for(i = 0; i < encoder->private_->seek_table->num_points; i++) {
 			FLAC__uint64 xx;
-			unsigned x;
+			uint32_t x;
 			xx = encoder->private_->seek_table->points[i].sample_number;
 			b[7] = (FLAC__byte)xx; xx >>= 8;
 			b[6] = (FLAC__byte)xx; xx >>= 8;
@@ -2840,7 +3011,7 @@
 void update_ogg_metadata_(FLAC__StreamEncoder *encoder)
 {
 	/* the # of bytes in the 1st packet that precede the STREAMINFO */
-	static const unsigned FIRST_OGG_PACKET_STREAMINFO_PREFIX_LENGTH =
+	static const uint32_t FIRST_OGG_PACKET_STREAMINFO_PREFIX_LENGTH =
 		FLAC__OGG_MAPPING_PACKET_TYPE_LENGTH +
 		FLAC__OGG_MAPPING_MAGIC_LENGTH +
 		FLAC__OGG_MAPPING_VERSION_MAJOR_LENGTH +
@@ -2851,8 +3022,8 @@
 	FLAC__byte b[flac_max(6u, FLAC__STREAM_METADATA_SEEKPOINT_LENGTH)];
 	const FLAC__StreamMetadata *metadata = &encoder->private_->streaminfo;
 	const FLAC__uint64 samples = metadata->data.stream_info.total_samples;
-	const unsigned min_framesize = metadata->data.stream_info.min_framesize;
-	const unsigned max_framesize = metadata->data.stream_info.max_framesize;
+	const uint32_t min_framesize = metadata->data.stream_info.min_framesize;
+	const uint32_t max_framesize = metadata->data.stream_info.max_framesize;
 	ogg_page page;
 
 	FLAC__ASSERT(metadata->type == FLAC__METADATA_TYPE_STREAMINFO);
@@ -2882,7 +3053,7 @@
 	 * Write MD5 signature
 	 */
 	{
-		const unsigned md5_offset =
+		const uint32_t md5_offset =
 			FIRST_OGG_PACKET_STREAMINFO_PREFIX_LENGTH +
 			FLAC__STREAM_METADATA_HEADER_LENGTH +
 			(
@@ -2896,7 +3067,7 @@
 				FLAC__STREAM_METADATA_STREAMINFO_TOTAL_SAMPLES_LEN
 			) / 8;
 
-		if(md5_offset + 16 > (unsigned)page.body_len) {
+		if(md5_offset + 16 > (uint32_t)page.body_len) {
 			encoder->protected_->state = FLAC__STREAM_ENCODER_OGG_ERROR;
 			simple_ogg_page__clear(&page);
 			return;
@@ -2908,7 +3079,7 @@
 	 * Write total samples
 	 */
 	{
-		const unsigned total_samples_byte_offset =
+		const uint32_t total_samples_byte_offset =
 			FIRST_OGG_PACKET_STREAMINFO_PREFIX_LENGTH +
 			FLAC__STREAM_METADATA_HEADER_LENGTH +
 			(
@@ -2922,7 +3093,7 @@
 				- 4
 			) / 8;
 
-		if(total_samples_byte_offset + 5 > (unsigned)page.body_len) {
+		if(total_samples_byte_offset + 5 > (uint32_t)page.body_len) {
 			encoder->protected_->state = FLAC__STREAM_ENCODER_OGG_ERROR;
 			simple_ogg_page__clear(&page);
 			return;
@@ -2940,7 +3111,7 @@
 	 * Write min/max framesize
 	 */
 	{
-		const unsigned min_framesize_offset =
+		const uint32_t min_framesize_offset =
 			FIRST_OGG_PACKET_STREAMINFO_PREFIX_LENGTH +
 			FLAC__STREAM_METADATA_HEADER_LENGTH +
 			(
@@ -2948,7 +3119,7 @@
 				FLAC__STREAM_METADATA_STREAMINFO_MAX_BLOCK_SIZE_LEN
 			) / 8;
 
-		if(min_framesize_offset + 6 > (unsigned)page.body_len) {
+		if(min_framesize_offset + 6 > (uint32_t)page.body_len) {
 			encoder->protected_->state = FLAC__STREAM_ENCODER_OGG_ERROR;
 			simple_ogg_page__clear(&page);
 			return;
@@ -2971,7 +3142,7 @@
 	 * Write seektable
 	 */
 	if(0 != encoder->private_->seek_table && encoder->private_->seek_table->num_points > 0 && encoder->protected_->seektable_offset > 0) {
-		unsigned i;
+		uint32_t i;
 		FLAC__byte *p;
 
 		FLAC__format_seektable_sort(encoder->private_->seek_table);
@@ -2984,7 +3155,7 @@
 			return; /* state already set */
 		}
 
-		if((FLAC__STREAM_METADATA_HEADER_LENGTH + 18*encoder->private_->seek_table->num_points) != (unsigned)page.body_len) {
+		if((FLAC__STREAM_METADATA_HEADER_LENGTH + 18*encoder->private_->seek_table->num_points) != (uint32_t)page.body_len) {
 			encoder->protected_->state = FLAC__STREAM_ENCODER_OGG_ERROR;
 			simple_ogg_page__clear(&page);
 			return;
@@ -2992,7 +3163,7 @@
 
 		for(i = 0, p = page.body + FLAC__STREAM_METADATA_HEADER_LENGTH; i < encoder->private_->seek_table->num_points; i++, p += 18) {
 			FLAC__uint64 xx;
-			unsigned x;
+			uint32_t x;
 			xx = encoder->private_->seek_table->points[i].sample_number;
 			b[7] = (FLAC__byte)xx; xx >>= 8;
 			b[6] = (FLAC__byte)xx; xx >>= 8;
@@ -3026,7 +3197,7 @@
 }
 #endif
 
-FLAC__bool process_frame_(FLAC__StreamEncoder *encoder, FLAC__bool is_fractional_block, FLAC__bool is_last_block)
+FLAC__bool process_frame_(FLAC__StreamEncoder *encoder, FLAC__bool is_last_block)
 {
 	FLAC__uint16 crc;
 	FLAC__ASSERT(encoder->protected_->state == FLAC__STREAM_ENCODER_OK);
@@ -3042,7 +3213,7 @@
 	/*
 	 * Process the frame header and subframes into the frame bitbuffer
 	 */
-	if(!process_subframes_(encoder, is_fractional_block)) {
+	if(!process_subframes_(encoder)) {
 		/* the above function sets the state for us in case of an error */
 		return false;
 	}
@@ -3085,22 +3256,18 @@
 	return true;
 }
 
-FLAC__bool process_subframes_(FLAC__StreamEncoder *encoder, FLAC__bool is_fractional_block)
+FLAC__bool process_subframes_(FLAC__StreamEncoder *encoder)
 {
 	FLAC__FrameHeader frame_header;
-	unsigned channel, min_partition_order = encoder->protected_->min_residual_partition_order, max_partition_order;
-	FLAC__bool do_independent, do_mid_side;
+	uint32_t channel, min_partition_order = encoder->protected_->min_residual_partition_order, max_partition_order;
+	FLAC__bool do_independent, do_mid_side, backup_disable_constant_subframes = encoder->private_->disable_constant_subframes, all_subframes_constant = true;
 
 	/*
 	 * Calculate the min,max Rice partition orders
 	 */
-	if(is_fractional_block) {
-		max_partition_order = 0;
-	}
-	else {
-		max_partition_order = FLAC__format_get_max_rice_partition_order_from_blocksize(encoder->protected_->blocksize);
-		max_partition_order = flac_min(max_partition_order, encoder->protected_->max_residual_partition_order);
-	}
+
+	max_partition_order = FLAC__format_get_max_rice_partition_order_from_blocksize(encoder->protected_->blocksize);
+	max_partition_order = flac_min(max_partition_order, encoder->protected_->max_residual_partition_order);
 	min_partition_order = flac_min(min_partition_order, max_partition_order);
 
 	/*
@@ -3145,7 +3312,10 @@
 	 */
 	if(do_independent) {
 		for(channel = 0; channel < encoder->protected_->channels; channel++) {
-			const unsigned w = get_wasted_bits_(encoder->private_->integer_signal[channel], encoder->protected_->blocksize);
+			uint32_t w = get_wasted_bits_(encoder->private_->integer_signal[channel], encoder->protected_->blocksize);
+			if (w > encoder->protected_->bits_per_sample) {
+				w = encoder->protected_->bits_per_sample;
+			}
 			encoder->private_->subframe_workspace[channel][0].wasted_bits = encoder->private_->subframe_workspace[channel][1].wasted_bits = w;
 			encoder->private_->subframe_bps[channel] = encoder->protected_->bits_per_sample - w;
 		}
@@ -3153,7 +3323,15 @@
 	if(do_mid_side) {
 		FLAC__ASSERT(encoder->protected_->channels == 2);
 		for(channel = 0; channel < 2; channel++) {
-			const unsigned w = get_wasted_bits_(encoder->private_->integer_signal_mid_side[channel], encoder->protected_->blocksize);
+			uint32_t w;
+			if(encoder->protected_->bits_per_sample < 32 || channel == 0)
+				w = get_wasted_bits_(encoder->private_->integer_signal_mid_side[channel], encoder->protected_->blocksize);
+			else
+				w = get_wasted_bits_wide_(encoder->private_->integer_signal_33bit_side, encoder->private_->integer_signal_mid_side[channel], encoder->protected_->blocksize);
+
+			if (w > encoder->protected_->bits_per_sample) {
+				w = encoder->protected_->bits_per_sample;
+			}
 			encoder->private_->subframe_workspace_mid_side[channel][0].wasted_bits = encoder->private_->subframe_workspace_mid_side[channel][1].wasted_bits = w;
 			encoder->private_->subframe_bps_mid_side[channel] = encoder->protected_->bits_per_sample - w + (channel==0? 0:1);
 		}
@@ -3164,6 +3342,12 @@
 	 */
 	if(do_independent) {
 		for(channel = 0; channel < encoder->protected_->channels; channel++) {
+			if(encoder->protected_->limit_min_bitrate && all_subframes_constant && (channel + 1) == encoder->protected_->channels){
+				/* This frame contains only constant subframes at this point.
+				 * To prevent the frame from becoming too small, make sure
+				 * the last subframe isn't constant */
+				encoder->private_->disable_constant_subframes = true;
+			}
 			if(!
 				process_subframe_(
 					encoder,
@@ -3180,6 +3364,8 @@
 				)
 			)
 				return false;
+			if(encoder->private_->subframe_workspace[channel][encoder->private_->best_subframe[channel]].type != FLAC__SUBFRAME_TYPE_CONSTANT)
+				all_subframes_constant = false;
 		}
 	}
 
@@ -3190,6 +3376,11 @@
 		FLAC__ASSERT(encoder->protected_->channels == 2);
 
 		for(channel = 0; channel < 2; channel++) {
+			void *integer_signal_;
+			if(encoder->private_->subframe_bps_mid_side[channel] <= 32)
+				integer_signal_ = encoder->private_->integer_signal_mid_side[channel];
+			else
+				integer_signal_ = encoder->private_->integer_signal_33bit_side;
 			if(!
 				process_subframe_(
 					encoder,
@@ -3197,7 +3388,7 @@
 					max_partition_order,
 					&frame_header,
 					encoder->private_->subframe_bps_mid_side[channel],
-					encoder->private_->integer_signal_mid_side[channel],
+					integer_signal_,
 					encoder->private_->subframe_workspace_ptr_mid_side[channel],
 					encoder->private_->partitioned_rice_contents_workspace_ptr_mid_side[channel],
 					encoder->private_->residual_workspace_mid_side[channel],
@@ -3213,7 +3404,7 @@
 	 * Compose the frame bitbuffer
 	 */
 	if(do_mid_side) {
-		unsigned left_bps = 0, right_bps = 0; /* initialized only to prevent superfluous compiler warning */
+		uint32_t left_bps = 0, right_bps = 0; /* initialized only to prevent superfluous compiler warning */
 		FLAC__Subframe *left_subframe = 0, *right_subframe = 0; /* initialized only to prevent superfluous compiler warning */
 		FLAC__ChannelAssignment channel_assignment;
 
@@ -3223,8 +3414,8 @@
 			channel_assignment = (encoder->private_->last_channel_assignment == FLAC__CHANNEL_ASSIGNMENT_INDEPENDENT? FLAC__CHANNEL_ASSIGNMENT_INDEPENDENT : FLAC__CHANNEL_ASSIGNMENT_MID_SIDE);
 		}
 		else {
-			unsigned bits[4]; /* WATCHOUT - indexed by FLAC__ChannelAssignment */
-			unsigned min_bits;
+			uint32_t bits[4]; /* WATCHOUT - indexed by FLAC__ChannelAssignment */
+			uint32_t min_bits;
 			int ca;
 
 			FLAC__ASSERT(FLAC__CHANNEL_ASSIGNMENT_INDEPENDENT == 0);
@@ -3241,7 +3432,11 @@
 
 			channel_assignment = FLAC__CHANNEL_ASSIGNMENT_INDEPENDENT;
 			min_bits = bits[channel_assignment];
-			for(ca = 1; ca <= 3; ca++) {
+
+			/* When doing loose mid-side stereo, ignore left-side
+			 * and right-side options */
+			ca = encoder->protected_->loose_mid_side_stereo ? 3 : 1;
+			for( ; ca <= 3; ca++) {
 				if(bits[ca] < min_bits) {
 					min_bits = bits[ca];
 					channel_assignment = (FLAC__ChannelAssignment)ca;
@@ -3325,55 +3520,101 @@
 	}
 
 	encoder->private_->last_channel_assignment = frame_header.channel_assignment;
+	encoder->private_->disable_constant_subframes = backup_disable_constant_subframes;
 
 	return true;
 }
 
+static inline void set_next_subdivide_tukey(FLAC__int32 parts, uint32_t * apodizations, uint32_t * current_depth, uint32_t * current_part){
+	// current_part is interleaved: even are partial, odd are punchout
+	if(*current_depth == 2){
+		// For depth 2, we only do partial, no punchout as that is almost redundant
+		if(*current_part == 0){
+			*current_part = 2;
+		}else{ /* *current_path == 2 */
+			*current_part = 0;
+			(*current_depth)++;
+		}
+	}else if((*current_part) < (2*(*current_depth)-1)){
+		(*current_part)++;
+	}else{ /* (*current_part) >= (2*(*current_depth)-1) */
+		*current_part = 0;
+		(*current_depth)++;
+	}
+
+	/* Now check if we are done with this SUBDIVIDE_TUKEY apodization */
+	if(*current_depth > (uint32_t) parts){
+		(*apodizations)++;
+		*current_depth = 1;
+		*current_part = 0;
+	}
+}
+
 FLAC__bool process_subframe_(
 	FLAC__StreamEncoder *encoder,
-	unsigned min_partition_order,
-	unsigned max_partition_order,
+	uint32_t min_partition_order,
+	uint32_t max_partition_order,
 	const FLAC__FrameHeader *frame_header,
-	unsigned subframe_bps,
-	const FLAC__int32 integer_signal[],
+	uint32_t subframe_bps,
+	const void *integer_signal,
 	FLAC__Subframe *subframe[2],
 	FLAC__EntropyCodingMethod_PartitionedRiceContents *partitioned_rice_contents[2],
 	FLAC__int32 *residual[2],
-	unsigned *best_subframe,
-	unsigned *best_bits
+	uint32_t *best_subframe,
+	uint32_t *best_bits
 )
 {
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
-	FLAC__float fixed_residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1];
+	float fixed_residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1];
 #else
 	FLAC__fixedpoint fixed_residual_bits_per_sample[FLAC__MAX_FIXED_ORDER+1];
 #endif
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
-	FLAC__double lpc_residual_bits_per_sample;
-	FLAC__real autoc[FLAC__MAX_LPC_ORDER+1]; /* WATCHOUT: the size is important even though encoder->protected_->max_lpc_order might be less; some asm and x86 intrinsic routines need all the space */
-	FLAC__double lpc_error[FLAC__MAX_LPC_ORDER];
-	unsigned min_lpc_order, max_lpc_order, lpc_order;
-	unsigned min_qlp_coeff_precision, max_qlp_coeff_precision, qlp_coeff_precision;
+	double lpc_residual_bits_per_sample;
+	double autoc[FLAC__MAX_LPC_ORDER+1]; /* WATCHOUT: the size is important even though encoder->protected_->max_lpc_order might be less; some asm and x86 intrinsic routines need all the space */
+	double autoc_root[FLAC__MAX_LPC_ORDER+1]; /* This is for subdivide_tukey apodization */
+	double lpc_error[FLAC__MAX_LPC_ORDER];
+	uint32_t min_lpc_order, max_lpc_order, lpc_order;
+	uint32_t min_qlp_coeff_precision, max_qlp_coeff_precision, qlp_coeff_precision;
 #endif
-	unsigned min_fixed_order, max_fixed_order, guess_fixed_order, fixed_order;
-	unsigned rice_parameter;
-	unsigned _candidate_bits, _best_bits;
-	unsigned _best_subframe;
+	uint32_t min_fixed_order, max_fixed_order, guess_fixed_order, fixed_order;
+	uint32_t _candidate_bits, _best_bits;
+	uint32_t _best_subframe;
 	/* only use RICE2 partitions if stream bps > 16 */
-	const unsigned rice_parameter_limit = FLAC__stream_encoder_get_bits_per_sample(encoder) > 16? FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_ESCAPE_PARAMETER : FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ESCAPE_PARAMETER;
+	const uint32_t rice_parameter_limit = FLAC__stream_encoder_get_bits_per_sample(encoder) > 16? FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_ESCAPE_PARAMETER : FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ESCAPE_PARAMETER;
 
 	FLAC__ASSERT(frame_header->blocksize > 0);
 
 	/* verbatim subframe is the baseline against which we measure other compressed subframes */
 	_best_subframe = 0;
 	if(encoder->private_->disable_verbatim_subframes && frame_header->blocksize >= FLAC__MAX_FIXED_ORDER)
-		_best_bits = UINT_MAX;
+		_best_bits = UINT32_MAX;
 	else
 		_best_bits = evaluate_verbatim_subframe_(encoder, integer_signal, frame_header->blocksize, subframe_bps, subframe[_best_subframe]);
+	*best_bits = _best_bits;
 
-	if(frame_header->blocksize >= FLAC__MAX_FIXED_ORDER) {
-		unsigned signal_is_constant = false;
-		guess_fixed_order = encoder->private_->local_fixed_compute_best_predictor(integer_signal+FLAC__MAX_FIXED_ORDER, frame_header->blocksize-FLAC__MAX_FIXED_ORDER, fixed_residual_bits_per_sample);
+	if(frame_header->blocksize > FLAC__MAX_FIXED_ORDER) {
+		uint32_t signal_is_constant = false;
+		/* The next formula determines when to use a 64-bit accumulator
+		 * for the error of a fixed predictor, and when a 32-bit one. As
+		 * the error of a 4th order predictor for a given sample is the
+		 * sum of 17 sample values (1+4+6+4+1) and there are blocksize -
+		 * order error values to be summed, the maximum total error is
+		 * maximum_sample_value * (blocksize - order) * 17. As ilog2(x)
+		 * calculates floor(2log(x)), the result must be 31 or lower
+		 */
+		if(subframe_bps < 28){
+			if(subframe_bps + FLAC__bitmath_ilog2((frame_header->blocksize-FLAC__MAX_FIXED_ORDER)*17) < 32)
+				guess_fixed_order = encoder->private_->local_fixed_compute_best_predictor(((FLAC__int32 *)integer_signal)+FLAC__MAX_FIXED_ORDER, frame_header->blocksize-FLAC__MAX_FIXED_ORDER, fixed_residual_bits_per_sample);
+			else
+				guess_fixed_order = encoder->private_->local_fixed_compute_best_predictor_wide(((FLAC__int32 *)integer_signal)+FLAC__MAX_FIXED_ORDER, frame_header->blocksize-FLAC__MAX_FIXED_ORDER, fixed_residual_bits_per_sample);
+		}
+		else
+			if(subframe_bps <= 32)
+				guess_fixed_order = FLAC__fixed_compute_best_predictor_limit_residual(((FLAC__int32 *)integer_signal),frame_header->blocksize, fixed_residual_bits_per_sample);
+			else
+				guess_fixed_order = FLAC__fixed_compute_best_predictor_limit_residual_33bit(((FLAC__int64 *)integer_signal),frame_header->blocksize, fixed_residual_bits_per_sample);
+
 		/* check for constant subframe */
 		if(
 			!encoder->private_->disable_constant_subframes &&
@@ -3384,17 +3625,33 @@
 #endif
 		) {
 			/* the above means it's possible all samples are the same value; now double-check it: */
-			unsigned i;
+			uint32_t i;
 			signal_is_constant = true;
-			for(i = 1; i < frame_header->blocksize; i++) {
-				if(integer_signal[0] != integer_signal[i]) {
-					signal_is_constant = false;
-					break;
+			if(subframe_bps <= 32){
+				const FLAC__int32 *integer_signal_ = integer_signal;
+				for(i = 1; i < frame_header->blocksize; i++) {
+					if(integer_signal_[0] != integer_signal_[i]) {
+						signal_is_constant = false;
+						break;
+					}
+				}
+			}
+			else {
+				const FLAC__int64 *integer_signal_ = integer_signal;
+				for(i = 1; i < frame_header->blocksize; i++) {
+					if(integer_signal_[0] != integer_signal_[i]) {
+						signal_is_constant = false;
+						break;
+					}
 				}
 			}
 		}
 		if(signal_is_constant) {
-			_candidate_bits = evaluate_constant_subframe_(encoder, integer_signal[0], frame_header->blocksize, subframe_bps, subframe[!_best_subframe]);
+			if(subframe_bps <= 32)
+				_candidate_bits = evaluate_constant_subframe_(encoder, ((FLAC__int32 *)integer_signal)[0], frame_header->blocksize, subframe_bps, subframe[!_best_subframe]);
+			else
+				_candidate_bits = evaluate_constant_subframe_(encoder, ((FLAC__int64 *)integer_signal)[0], frame_header->blocksize, subframe_bps, subframe[!_best_subframe]);
+
 			if(_candidate_bits < _best_bits) {
 				_best_subframe = !_best_subframe;
 				_best_bits = _candidate_bits;
@@ -3414,21 +3671,12 @@
 					max_fixed_order = frame_header->blocksize - 1;
 				for(fixed_order = min_fixed_order; fixed_order <= max_fixed_order; fixed_order++) {
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
-					if(fixed_residual_bits_per_sample[fixed_order] >= (FLAC__float)subframe_bps)
+					if(fixed_residual_bits_per_sample[fixed_order] >= (float)subframe_bps)
 						continue; /* don't even try */
-					rice_parameter = (fixed_residual_bits_per_sample[fixed_order] > 0.0)? (unsigned)(fixed_residual_bits_per_sample[fixed_order]+0.5) : 0; /* 0.5 is for rounding */
 #else
 					if(FLAC__fixedpoint_trunc(fixed_residual_bits_per_sample[fixed_order]) >= (int)subframe_bps)
 						continue; /* don't even try */
-					rice_parameter = (fixed_residual_bits_per_sample[fixed_order] > FLAC__FP_ZERO)? (unsigned)FLAC__fixedpoint_trunc(fixed_residual_bits_per_sample[fixed_order]+FLAC__FP_ONE_HALF) : 0; /* 0.5 is for rounding */
 #endif
-					rice_parameter++; /* to account for the signed->unsigned conversion during rice coding */
-					if(rice_parameter >= rice_parameter_limit) {
-#ifdef DEBUG_VERBOSE
-						fprintf(stderr, "clipping rice_parameter (%u -> %u) @0\n", rice_parameter, rice_parameter_limit - 1);
-#endif
-						rice_parameter = rice_parameter_limit - 1;
-					}
 					_candidate_bits =
 						evaluate_fixed_subframe_(
 							encoder,
@@ -3439,7 +3687,6 @@
 							frame_header->blocksize,
 							subframe_bps,
 							fixed_order,
-							rice_parameter,
 							rice_parameter_limit,
 							min_partition_order,
 							max_partition_order,
@@ -3463,21 +3710,65 @@
 				else
 					max_lpc_order = encoder->protected_->max_lpc_order;
 				if(max_lpc_order > 0) {
-					unsigned a;
-					for (a = 0; a < encoder->protected_->num_apodizations; a++) {
-						FLAC__lpc_window_data(integer_signal, encoder->private_->window[a], encoder->private_->windowed_signal, frame_header->blocksize);
-						encoder->private_->local_lpc_compute_autocorrelation(encoder->private_->windowed_signal, frame_header->blocksize, max_lpc_order+1, autoc);
+					uint32_t a, b = 1, c = 0;
+					for (a = 0; a < encoder->protected_->num_apodizations;) {
+						uint32_t max_lpc_order_this_apodization = max_lpc_order;
+						if(b == 1){
+							/* window full subblock */
+							if(subframe_bps <= 32)
+								FLAC__lpc_window_data(integer_signal, encoder->private_->window[a], encoder->private_->windowed_signal, frame_header->blocksize);
+							else
+								FLAC__lpc_window_data_wide(integer_signal, encoder->private_->window[a], encoder->private_->windowed_signal, frame_header->blocksize);
+							encoder->private_->local_lpc_compute_autocorrelation(encoder->private_->windowed_signal, frame_header->blocksize, max_lpc_order_this_apodization+1, autoc);
+							if(encoder->protected_->apodizations[a].type == FLAC__APODIZATION_SUBDIVIDE_TUKEY){
+								uint32_t i;
+								for(i = 0; i < max_lpc_order_this_apodization; i++)
+									autoc_root[i] = autoc[i];
+								b++;
+							}else{
+								a++;
+							}
+						}
+						else {
+							/* window part of subblock */
+							if(frame_header->blocksize/b <= FLAC__MAX_LPC_ORDER) {
+								/* intrinsics autocorrelation routines do not all handle cases in which lag might be
+								 * larger than data_len, and some routines round lag up to the nearest multiple of 4
+								 * As little gain is expected from using LPC on part of a signal as small as 32 samples
+								 * and to enable widening this rounding up to larger values in the future, windowing
+								 * parts smaller than or equal to FLAC__MAX_LPC_ORDER (which is 32) samples is not supported */
+								set_next_subdivide_tukey(encoder->protected_->apodizations[a].parameters.subdivide_tukey.parts, &a, &b, &c);
+								continue;
+							}
+							if(!(c % 2)){
+								/* on even c, evaluate the (c/2)th partial window of size blocksize/b  */
+								if(subframe_bps <= 32)
+									FLAC__lpc_window_data_partial(integer_signal, encoder->private_->window[a], encoder->private_->windowed_signal, frame_header->blocksize, frame_header->blocksize/b/2, (c/2*frame_header->blocksize)/b);
+								else
+									FLAC__lpc_window_data_partial(integer_signal, encoder->private_->window[a], encoder->private_->windowed_signal, frame_header->blocksize, frame_header->blocksize/b/2, (c/2*frame_header->blocksize)/b);
+								encoder->private_->local_lpc_compute_autocorrelation(encoder->private_->windowed_signal, frame_header->blocksize/b, max_lpc_order_this_apodization+1, autoc);
+							}else{
+								/* on uneven c, evaluate the root window (over the whole block) minus the previous partial window
+								 * similar to tukey_punchout apodization but more efficient	*/
+								uint32_t i;
+								for(i = 0; i < max_lpc_order_this_apodization; i++)
+									autoc[i] = autoc_root[i] - autoc[i];
+							}
+							/* Next function sets a, b and c appropriate for next iteration */
+							set_next_subdivide_tukey(encoder->protected_->apodizations[a].parameters.subdivide_tukey.parts, &a, &b, &c);
+						}
+
 						/* if autoc[0] == 0.0, the signal is constant and we usually won't get here, but it can happen */
 						if(autoc[0] != 0.0) {
-							FLAC__lpc_compute_lp_coefficients(autoc, &max_lpc_order, encoder->private_->lp_coeff, lpc_error);
+							FLAC__lpc_compute_lp_coefficients(autoc, &max_lpc_order_this_apodization, encoder->private_->lp_coeff, lpc_error);
 							if(encoder->protected_->do_exhaustive_model_search) {
 								min_lpc_order = 1;
 							}
 							else {
-								const unsigned guess_lpc_order =
+								const uint32_t guess_lpc_order =
 									FLAC__lpc_compute_best_order(
 										lpc_error,
-										max_lpc_order,
+										max_lpc_order_this_apodization,
 										frame_header->blocksize,
 										subframe_bps + (
 											encoder->protected_->do_qlp_coeff_prec_search?
@@ -3485,26 +3776,18 @@
 												encoder->protected_->qlp_coeff_precision
 										)
 									);
-								min_lpc_order = max_lpc_order = guess_lpc_order;
+								min_lpc_order = max_lpc_order_this_apodization = guess_lpc_order;
 							}
-							if(max_lpc_order >= frame_header->blocksize)
-								max_lpc_order = frame_header->blocksize - 1;
-							for(lpc_order = min_lpc_order; lpc_order <= max_lpc_order; lpc_order++) {
+							if(max_lpc_order_this_apodization >= frame_header->blocksize)
+								max_lpc_order_this_apodization = frame_header->blocksize - 1;
+							for(lpc_order = min_lpc_order; lpc_order <= max_lpc_order_this_apodization; lpc_order++) {
 								lpc_residual_bits_per_sample = FLAC__lpc_compute_expected_bits_per_residual_sample(lpc_error[lpc_order-1], frame_header->blocksize-lpc_order);
-								if(lpc_residual_bits_per_sample >= (FLAC__double)subframe_bps)
+								if(lpc_residual_bits_per_sample >= (double)subframe_bps)
 									continue; /* don't even try */
-								rice_parameter = (lpc_residual_bits_per_sample > 0.0)? (unsigned)(lpc_residual_bits_per_sample+0.5) : 0; /* 0.5 is for rounding */
-								rice_parameter++; /* to account for the signed->unsigned conversion during rice coding */
-								if(rice_parameter >= rice_parameter_limit) {
-#ifdef DEBUG_VERBOSE
-									fprintf(stderr, "clipping rice_parameter (%u -> %u) @1\n", rice_parameter, rice_parameter_limit - 1);
-#endif
-									rice_parameter = rice_parameter_limit - 1;
-								}
 								if(encoder->protected_->do_qlp_coeff_prec_search) {
 									min_qlp_coeff_precision = FLAC__MIN_QLP_COEFF_PRECISION;
-									/* try to keep qlp coeff precision such that only 32-bit math is required for decode of <=16bps streams */
-									if(subframe_bps <= 16) {
+									/* try to keep qlp coeff precision such that only 32-bit math is required for decode of <=16bps(+1bps for side channel) streams */
+									if(subframe_bps <= 17) {
 										max_qlp_coeff_precision = flac_min(32 - subframe_bps - FLAC__bitmath_ilog2(lpc_order), FLAC__MAX_QLP_COEFF_PRECISION);
 										max_qlp_coeff_precision = flac_max(max_qlp_coeff_precision, min_qlp_coeff_precision);
 									}
@@ -3527,7 +3810,6 @@
 											subframe_bps,
 											lpc_order,
 											qlp_coeff_precision,
-											rice_parameter,
 											rice_parameter_limit,
 											min_partition_order,
 											max_partition_order,
@@ -3553,7 +3835,7 @@
 	}
 
 	/* under rare circumstances this can happen when all but lpc subframe types are disabled: */
-	if(_best_bits == UINT_MAX) {
+	if(_best_bits == UINT32_MAX) {
 		FLAC__ASSERT(_best_subframe == 0);
 		_best_bits = evaluate_verbatim_subframe_(encoder, integer_signal, frame_header->blocksize, subframe_bps, subframe[_best_subframe]);
 	}
@@ -3566,8 +3848,8 @@
 
 FLAC__bool add_subframe_(
 	FLAC__StreamEncoder *encoder,
-	unsigned blocksize,
-	unsigned subframe_bps,
+	uint32_t blocksize,
+	uint32_t subframe_bps,
 	const FLAC__Subframe *subframe,
 	FLAC__BitWriter *frame
 )
@@ -3608,10 +3890,10 @@
 #if SPOTCHECK_ESTIMATE
 static void spotcheck_subframe_estimate_(
 	FLAC__StreamEncoder *encoder,
-	unsigned blocksize,
-	unsigned subframe_bps,
+	uint32_t blocksize,
+	uint32_t subframe_bps,
 	const FLAC__Subframe *subframe,
-	unsigned estimate
+	uint32_t estimate
 )
 {
 	FLAC__bool ret;
@@ -3627,7 +3909,7 @@
 	ret = add_subframe_(encoder, blocksize, subframe_bps, subframe, frame);
 	FLAC__ASSERT(ret);
 	{
-		const unsigned actual = FLAC__bitwriter_get_input_bits_unconsumed(frame);
+		const uint32_t actual = FLAC__bitwriter_get_input_bits_unconsumed(frame);
 		if(estimate != actual)
 			fprintf(stderr, "EST: bad, frame#%u sub#%%d type=%8s est=%u, actual=%u, delta=%d\n", encoder->private_->current_frame_number, FLAC__SubframeTypeString[subframe->type], estimate, actual, (int)actual-(int)estimate);
 	}
@@ -3635,15 +3917,15 @@
 }
 #endif
 
-unsigned evaluate_constant_subframe_(
+uint32_t evaluate_constant_subframe_(
 	FLAC__StreamEncoder *encoder,
-	const FLAC__int32 signal,
-	unsigned blocksize,
-	unsigned subframe_bps,
+	const FLAC__int64 signal,
+	uint32_t blocksize,
+	uint32_t subframe_bps,
 	FLAC__Subframe *subframe
 )
 {
-	unsigned estimate;
+	uint32_t estimate;
 	subframe->type = FLAC__SUBFRAME_TYPE_CONSTANT;
 	subframe->data.constant.value = signal;
 
@@ -3658,29 +3940,33 @@
 	return estimate;
 }
 
-unsigned evaluate_fixed_subframe_(
+uint32_t evaluate_fixed_subframe_(
 	FLAC__StreamEncoder *encoder,
-	const FLAC__int32 signal[],
+	const void *signal,
 	FLAC__int32 residual[],
 	FLAC__uint64 abs_residual_partition_sums[],
-	unsigned raw_bits_per_partition[],
-	unsigned blocksize,
-	unsigned subframe_bps,
-	unsigned order,
-	unsigned rice_parameter,
-	unsigned rice_parameter_limit,
-	unsigned min_partition_order,
-	unsigned max_partition_order,
+	uint32_t raw_bits_per_partition[],
+	uint32_t blocksize,
+	uint32_t subframe_bps,
+	uint32_t order,
+	uint32_t rice_parameter_limit,
+	uint32_t min_partition_order,
+	uint32_t max_partition_order,
 	FLAC__bool do_escape_coding,
-	unsigned rice_parameter_search_dist,
+	uint32_t rice_parameter_search_dist,
 	FLAC__Subframe *subframe,
 	FLAC__EntropyCodingMethod_PartitionedRiceContents *partitioned_rice_contents
 )
 {
-	unsigned i, residual_bits, estimate;
-	const unsigned residual_samples = blocksize - order;
+	uint32_t i, residual_bits, estimate;
+	const uint32_t residual_samples = blocksize - order;
 
-	FLAC__fixed_compute_residual(signal+order, residual_samples, order, residual);
+	if((subframe_bps + order) <= 32)
+		FLAC__fixed_compute_residual(((FLAC__int32 *)signal)+order, residual_samples, order, residual);
+	else if(subframe_bps <= 32)
+		FLAC__fixed_compute_residual_wide(((FLAC__int32 *)signal)+order, residual_samples, order, residual);
+	else
+		FLAC__fixed_compute_residual_wide_33bit(((FLAC__int64 *)signal)+order, residual_samples, order, residual);
 
 	subframe->type = FLAC__SUBFRAME_TYPE_FIXED;
 
@@ -3696,7 +3982,6 @@
 			raw_bits_per_partition,
 			residual_samples,
 			order,
-			rice_parameter,
 			rice_parameter_limit,
 			min_partition_order,
 			max_partition_order,
@@ -3707,10 +3992,18 @@
 		);
 
 	subframe->data.fixed.order = order;
-	for(i = 0; i < order; i++)
-		subframe->data.fixed.warmup[i] = signal[i];
+	if(subframe_bps <= 32)
+		for(i = 0; i < order; i++)
+			subframe->data.fixed.warmup[i] = ((FLAC__int32 *)signal)[i];
+	else
+		for(i = 0; i < order; i++)
+			subframe->data.fixed.warmup[i] = ((FLAC__int64 *)signal)[i];
 
-	estimate = FLAC__SUBFRAME_ZERO_PAD_LEN + FLAC__SUBFRAME_TYPE_LEN + FLAC__SUBFRAME_WASTED_BITS_FLAG_LEN + subframe->wasted_bits + (order * subframe_bps) + residual_bits;
+	estimate = FLAC__SUBFRAME_ZERO_PAD_LEN + FLAC__SUBFRAME_TYPE_LEN + FLAC__SUBFRAME_WASTED_BITS_FLAG_LEN + subframe->wasted_bits + (order * subframe_bps);
+	if(residual_bits < UINT32_MAX - estimate) // To make sure estimate doesn't overflow
+		estimate += residual_bits;
+	else
+		estimate = UINT32_MAX;
 
 #if SPOTCHECK_ESTIMATE
 	spotcheck_subframe_estimate_(encoder, blocksize, subframe_bps, subframe, estimate);
@@ -3720,34 +4013,33 @@
 }
 
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
-unsigned evaluate_lpc_subframe_(
+uint32_t evaluate_lpc_subframe_(
 	FLAC__StreamEncoder *encoder,
-	const FLAC__int32 signal[],
+	const void *signal,
 	FLAC__int32 residual[],
 	FLAC__uint64 abs_residual_partition_sums[],
-	unsigned raw_bits_per_partition[],
+	uint32_t raw_bits_per_partition[],
 	const FLAC__real lp_coeff[],
-	unsigned blocksize,
-	unsigned subframe_bps,
-	unsigned order,
-	unsigned qlp_coeff_precision,
-	unsigned rice_parameter,
-	unsigned rice_parameter_limit,
-	unsigned min_partition_order,
-	unsigned max_partition_order,
+	uint32_t blocksize,
+	uint32_t subframe_bps,
+	uint32_t order,
+	uint32_t qlp_coeff_precision,
+	uint32_t rice_parameter_limit,
+	uint32_t min_partition_order,
+	uint32_t max_partition_order,
 	FLAC__bool do_escape_coding,
-	unsigned rice_parameter_search_dist,
+	uint32_t rice_parameter_search_dist,
 	FLAC__Subframe *subframe,
 	FLAC__EntropyCodingMethod_PartitionedRiceContents *partitioned_rice_contents
 )
 {
 	FLAC__int32 qlp_coeff[FLAC__MAX_LPC_ORDER]; /* WATCHOUT: the size is important; some x86 intrinsic routines need more than lpc order elements */
-	unsigned i, residual_bits, estimate;
+	uint32_t i, residual_bits, estimate;
 	int quantization, ret;
-	const unsigned residual_samples = blocksize - order;
+	const uint32_t residual_samples = blocksize - order;
 
-	/* try to keep qlp coeff precision such that only 32-bit math is required for decode of <=16bps streams */
-	if(subframe_bps <= 16) {
+	/* try to keep qlp coeff precision such that only 32-bit math is required for decode of <=16bps(+1bps for side channel) streams */
+	if(subframe_bps <= 17) {
 		FLAC__ASSERT(order > 0);
 		FLAC__ASSERT(order <= FLAC__MAX_LPC_ORDER);
 		qlp_coeff_precision = flac_min(qlp_coeff_precision, 32 - subframe_bps - FLAC__bitmath_ilog2(order));
@@ -3757,13 +4049,23 @@
 	if(ret != 0)
 		return 0; /* this is a hack to indicate to the caller that we can't do lp at this order on this subframe */
 
-	if(subframe_bps + qlp_coeff_precision + FLAC__bitmath_ilog2(order) <= 32)
-		if(subframe_bps <= 16 && qlp_coeff_precision <= 16)
-			encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit(signal+order, residual_samples, qlp_coeff, order, quantization, residual);
+	if(FLAC__lpc_max_residual_bps(subframe_bps, qlp_coeff, order, quantization) > 32) {
+		if(subframe_bps <= 32){
+			if(!FLAC__lpc_compute_residual_from_qlp_coefficients_limit_residual(((FLAC__int32 *)signal)+order, residual_samples, qlp_coeff, order, quantization, residual))
+				return 0;
+		}
 		else
-			encoder->private_->local_lpc_compute_residual_from_qlp_coefficients(signal+order, residual_samples, qlp_coeff, order, quantization, residual);
+			if(!FLAC__lpc_compute_residual_from_qlp_coefficients_limit_residual_33bit(((FLAC__int64 *)signal)+order, residual_samples, qlp_coeff, order, quantization, residual))
+				return 0;
+	}
 	else
-		encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_64bit(signal+order, residual_samples, qlp_coeff, order, quantization, residual);
+		if(FLAC__lpc_max_prediction_before_shift_bps(subframe_bps, qlp_coeff, order) <= 32)
+			if(subframe_bps <= 16 && qlp_coeff_precision <= 16)
+				encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_16bit(((FLAC__int32 *)signal)+order, residual_samples, qlp_coeff, order, quantization, residual);
+			else
+				encoder->private_->local_lpc_compute_residual_from_qlp_coefficients(((FLAC__int32 *)signal)+order, residual_samples, qlp_coeff, order, quantization, residual);
+		else
+			encoder->private_->local_lpc_compute_residual_from_qlp_coefficients_64bit(((FLAC__int32 *)signal)+order, residual_samples, qlp_coeff, order, quantization, residual);
 
 	subframe->type = FLAC__SUBFRAME_TYPE_LPC;
 
@@ -3779,7 +4081,6 @@
 			raw_bits_per_partition,
 			residual_samples,
 			order,
-			rice_parameter,
 			rice_parameter_limit,
 			min_partition_order,
 			max_partition_order,
@@ -3793,10 +4094,19 @@
 	subframe->data.lpc.qlp_coeff_precision = qlp_coeff_precision;
 	subframe->data.lpc.quantization_level = quantization;
 	memcpy(subframe->data.lpc.qlp_coeff, qlp_coeff, sizeof(FLAC__int32)*FLAC__MAX_LPC_ORDER);
-	for(i = 0; i < order; i++)
-		subframe->data.lpc.warmup[i] = signal[i];
+	if(subframe_bps <= 32)
+		for(i = 0; i < order; i++)
+			subframe->data.lpc.warmup[i] = ((FLAC__int32 *)signal)[i];
+	else
+		for(i = 0; i < order; i++)
+			subframe->data.lpc.warmup[i] = ((FLAC__int64 *)signal)[i];
 
-	estimate = FLAC__SUBFRAME_ZERO_PAD_LEN + FLAC__SUBFRAME_TYPE_LEN + FLAC__SUBFRAME_WASTED_BITS_FLAG_LEN + subframe->wasted_bits + FLAC__SUBFRAME_LPC_QLP_COEFF_PRECISION_LEN + FLAC__SUBFRAME_LPC_QLP_SHIFT_LEN + (order * (qlp_coeff_precision + subframe_bps)) + residual_bits;
+
+	estimate = FLAC__SUBFRAME_ZERO_PAD_LEN + FLAC__SUBFRAME_TYPE_LEN + FLAC__SUBFRAME_WASTED_BITS_FLAG_LEN + subframe->wasted_bits + FLAC__SUBFRAME_LPC_QLP_COEFF_PRECISION_LEN + FLAC__SUBFRAME_LPC_QLP_SHIFT_LEN + (order * (qlp_coeff_precision + subframe_bps));
+	if(residual_bits < UINT32_MAX - estimate) // To make sure estimate doesn't overflow
+		estimate += residual_bits;
+	else
+		estimate = UINT32_MAX;
 
 #if SPOTCHECK_ESTIMATE
 	spotcheck_subframe_estimate_(encoder, blocksize, subframe_bps, subframe, estimate);
@@ -3806,19 +4116,26 @@
 }
 #endif
 
-unsigned evaluate_verbatim_subframe_(
+uint32_t evaluate_verbatim_subframe_(
 	FLAC__StreamEncoder *encoder,
-	const FLAC__int32 signal[],
-	unsigned blocksize,
-	unsigned subframe_bps,
+	const void *signal,
+	uint32_t blocksize,
+	uint32_t subframe_bps,
 	FLAC__Subframe *subframe
 )
 {
-	unsigned estimate;
+	uint32_t estimate;
 
 	subframe->type = FLAC__SUBFRAME_TYPE_VERBATIM;
 
-	subframe->data.verbatim.data = signal;
+	if(subframe_bps <= 32){
+		subframe->data.verbatim.data_type = FLAC__VERBATIM_SUBFRAME_DATA_TYPE_INT32;
+		subframe->data.verbatim.data.int32 = signal;
+	}
+	else {
+		subframe->data.verbatim.data_type = FLAC__VERBATIM_SUBFRAME_DATA_TYPE_INT64;
+		subframe->data.verbatim.data.int64 = signal;
+	}
 
 	estimate = FLAC__SUBFRAME_ZERO_PAD_LEN + FLAC__SUBFRAME_TYPE_LEN + FLAC__SUBFRAME_WASTED_BITS_FLAG_LEN + subframe->wasted_bits + (blocksize * subframe_bps);
 
@@ -3831,27 +4148,26 @@
 	return estimate;
 }
 
-unsigned find_best_partition_order_(
+uint32_t find_best_partition_order_(
 	FLAC__StreamEncoderPrivate *private_,
 	const FLAC__int32 residual[],
 	FLAC__uint64 abs_residual_partition_sums[],
-	unsigned raw_bits_per_partition[],
-	unsigned residual_samples,
-	unsigned predictor_order,
-	unsigned rice_parameter,
-	unsigned rice_parameter_limit,
-	unsigned min_partition_order,
-	unsigned max_partition_order,
-	unsigned bps,
+	uint32_t raw_bits_per_partition[],
+	uint32_t residual_samples,
+	uint32_t predictor_order,
+	uint32_t rice_parameter_limit,
+	uint32_t min_partition_order,
+	uint32_t max_partition_order,
+	uint32_t bps,
 	FLAC__bool do_escape_coding,
-	unsigned rice_parameter_search_dist,
+	uint32_t rice_parameter_search_dist,
 	FLAC__EntropyCodingMethod *best_ecm
 )
 {
-	unsigned residual_bits, best_residual_bits = 0;
-	unsigned best_parameters_index = 0;
-	unsigned best_partition_order = 0;
-	const unsigned blocksize = residual_samples + predictor_order;
+	uint32_t residual_bits, best_residual_bits = 0;
+	uint32_t best_parameters_index = 0;
+	uint32_t best_partition_order = 0;
+	const uint32_t blocksize = residual_samples + predictor_order;
 
 	max_partition_order = FLAC__format_get_max_rice_partition_order_from_blocksize_limited_max_and_predictor_order(max_partition_order, blocksize, predictor_order);
 	min_partition_order = flac_min(min_partition_order, max_partition_order);
@@ -3863,7 +4179,7 @@
 
 	{
 		int partition_order;
-		unsigned sum;
+		uint32_t sum;
 
 		for(partition_order = (int)max_partition_order, sum = 0; partition_order >= (int)min_partition_order; partition_order--) {
 			if(!
@@ -3875,10 +4191,9 @@
 					raw_bits_per_partition+sum,
 					residual_samples,
 					predictor_order,
-					rice_parameter,
 					rice_parameter_limit,
 					rice_parameter_search_dist,
-					(unsigned)partition_order,
+					(uint32_t)partition_order,
 					do_escape_coding,
 					&private_->partitioned_rice_contents_extra[!best_parameters_index],
 					&residual_bits
@@ -3905,13 +4220,12 @@
 		 * knowledge; it is const to the outside world.
 		 */
 		FLAC__EntropyCodingMethod_PartitionedRiceContents* prc = (FLAC__EntropyCodingMethod_PartitionedRiceContents*)best_ecm->data.partitioned_rice.contents;
-		unsigned partition;
+		uint32_t partition;
 
 		/* save best parameters and raw_bits */
-		FLAC__format_entropy_coding_method_partitioned_rice_contents_ensure_size(prc, flac_max(6u, best_partition_order));
-		memcpy(prc->parameters, private_->partitioned_rice_contents_extra[best_parameters_index].parameters, sizeof(unsigned)*(1<<(best_partition_order)));
+		memcpy(prc->parameters, private_->partitioned_rice_contents_extra[best_parameters_index].parameters, (uint32_t)sizeof(uint32_t)*(1<<(best_partition_order)));
 		if(do_escape_coding)
-			memcpy(prc->raw_bits, private_->partitioned_rice_contents_extra[best_parameters_index].raw_bits, sizeof(unsigned)*(1<<(best_partition_order)));
+			memcpy(prc->raw_bits, private_->partitioned_rice_contents_extra[best_parameters_index].raw_bits, (uint32_t)sizeof(uint32_t)*(1<<(best_partition_order)));
 		/*
 		 * Now need to check if the type should be changed to
 		 * FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2 based on the
@@ -3931,53 +4245,49 @@
 void precompute_partition_info_sums_(
 	const FLAC__int32 residual[],
 	FLAC__uint64 abs_residual_partition_sums[],
-	unsigned residual_samples,
-	unsigned predictor_order,
-	unsigned min_partition_order,
-	unsigned max_partition_order,
-	unsigned bps
+	uint32_t residual_samples,
+	uint32_t predictor_order,
+	uint32_t min_partition_order,
+	uint32_t max_partition_order,
+	uint32_t bps
 )
 {
-	const unsigned default_partition_samples = (residual_samples + predictor_order) >> max_partition_order;
-	unsigned partitions = 1u << max_partition_order;
+	const uint32_t default_partition_samples = (residual_samples + predictor_order) >> max_partition_order;
+	uint32_t partitions = 1u << max_partition_order;
 
 	FLAC__ASSERT(default_partition_samples > predictor_order);
 
 	/* first do max_partition_order */
 	{
-		unsigned partition, residual_sample, end = (unsigned)(-(int)predictor_order);
-		/* WATCHOUT: "+ bps + FLAC__MAX_EXTRA_RESIDUAL_BPS" is the maximum
-		 * assumed size of the average residual magnitude */
-		if(FLAC__bitmath_ilog2(default_partition_samples) + bps + FLAC__MAX_EXTRA_RESIDUAL_BPS < 32) {
-			FLAC__uint32 abs_residual_partition_sum;
-
+		const uint32_t threshold = 32 - FLAC__bitmath_ilog2(default_partition_samples);
+		uint32_t partition, residual_sample, end = (uint32_t)(-(int)predictor_order);
+		/* WATCHOUT: "bps + FLAC__MAX_EXTRA_RESIDUAL_BPS" is the maximum assumed size of the average residual magnitude */
+		if(bps + FLAC__MAX_EXTRA_RESIDUAL_BPS < threshold) {
 			for(partition = residual_sample = 0; partition < partitions; partition++) {
+				FLAC__uint32 abs_residual_partition_sum = 0;
 				end += default_partition_samples;
-				abs_residual_partition_sum = 0;
 				for( ; residual_sample < end; residual_sample++)
 					abs_residual_partition_sum += abs(residual[residual_sample]); /* abs(INT_MIN) is undefined, but if the residual is INT_MIN we have bigger problems */
 				abs_residual_partition_sums[partition] = abs_residual_partition_sum;
 			}
 		}
 		else { /* have to pessimistically use 64 bits for accumulator */
-			FLAC__uint64 abs_residual_partition_sum;
-
 			for(partition = residual_sample = 0; partition < partitions; partition++) {
+				FLAC__uint64 abs_residual_partition_sum64 = 0;
 				end += default_partition_samples;
-				abs_residual_partition_sum = 0;
 				for( ; residual_sample < end; residual_sample++)
-					abs_residual_partition_sum += abs(residual[residual_sample]); /* abs(INT_MIN) is undefined, but if the residual is INT_MIN we have bigger problems */
-				abs_residual_partition_sums[partition] = abs_residual_partition_sum;
+					abs_residual_partition_sum64 += abs(residual[residual_sample]); /* abs(INT_MIN) is undefined, but if the residual is INT_MIN we have bigger problems */
+				abs_residual_partition_sums[partition] = abs_residual_partition_sum64;
 			}
 		}
 	}
 
 	/* now merge partitions for lower orders */
 	{
-		unsigned from_partition = 0, to_partition = partitions;
+		uint32_t from_partition = 0, to_partition = partitions;
 		int partition_order;
 		for(partition_order = (int)max_partition_order - 1; partition_order >= (int)min_partition_order; partition_order--) {
-			unsigned i;
+			uint32_t i;
 			partitions >>= 1;
 			for(i = 0; i < partitions; i++) {
 				abs_residual_partition_sums[to_partition++] =
@@ -3991,24 +4301,24 @@
 
 void precompute_partition_info_escapes_(
 	const FLAC__int32 residual[],
-	unsigned raw_bits_per_partition[],
-	unsigned residual_samples,
-	unsigned predictor_order,
-	unsigned min_partition_order,
-	unsigned max_partition_order
+	uint32_t raw_bits_per_partition[],
+	uint32_t residual_samples,
+	uint32_t predictor_order,
+	uint32_t min_partition_order,
+	uint32_t max_partition_order
 )
 {
 	int partition_order;
-	unsigned from_partition, to_partition = 0;
-	const unsigned blocksize = residual_samples + predictor_order;
+	uint32_t from_partition, to_partition = 0;
+	const uint32_t blocksize = residual_samples + predictor_order;
 
 	/* first do max_partition_order */
 	for(partition_order = (int)max_partition_order; partition_order >= 0; partition_order--) {
 		FLAC__int32 r;
 		FLAC__uint32 rmax;
-		unsigned partition, partition_sample, partition_samples, residual_sample;
-		const unsigned partitions = 1u << partition_order;
-		const unsigned default_partition_samples = blocksize >> partition_order;
+		uint32_t partition, partition_sample, partition_samples, residual_sample;
+		const uint32_t partitions = 1u << partition_order;
+		const uint32_t default_partition_samples = blocksize >> partition_order;
 
 		FLAC__ASSERT(default_partition_samples > predictor_order);
 
@@ -4034,9 +4344,9 @@
 
 	/* now merge partitions for lower orders */
 	for(from_partition = 0, --partition_order; partition_order >= (int)min_partition_order; partition_order--) {
-		unsigned m;
-		unsigned i;
-		const unsigned partitions = 1u << partition_order;
+		uint32_t m;
+		uint32_t i;
+		const uint32_t partitions = 1u << partition_order;
 		for(i = 0; i < partitions; i++) {
 			m = raw_bits_per_partition[from_partition];
 			from_partition++;
@@ -4048,36 +4358,37 @@
 }
 
 #ifdef EXACT_RICE_BITS_CALCULATION
-static inline unsigned count_rice_bits_in_partition_(
-	const unsigned rice_parameter,
-	const unsigned partition_samples,
+static inline uint32_t count_rice_bits_in_partition_(
+	const uint32_t rice_parameter,
+	const uint32_t partition_samples,
 	const FLAC__int32 *residual
 )
 {
-	unsigned i, partition_bits =
+	uint32_t i;
+	uint64_t partition_bits =
 		FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_PARAMETER_LEN + /* actually could end up being FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_PARAMETER_LEN but err on side of 16bps */
 		(1+rice_parameter) * partition_samples /* 1 for unary stop bit + rice_parameter for the binary portion */
 	;
 	for(i = 0; i < partition_samples; i++)
 		partition_bits += ( (FLAC__uint32)((residual[i]<<1)^(residual[i]>>31)) >> rice_parameter );
-	return partition_bits;
+	return (uint32_t)(flac_min(partition_bits,UINT32_MAX)); // To make sure the return value doesn't overflow
 }
 #else
-static inline unsigned count_rice_bits_in_partition_(
-	const unsigned rice_parameter,
-	const unsigned partition_samples,
+static inline uint32_t count_rice_bits_in_partition_(
+	const uint32_t rice_parameter,
+	const uint32_t partition_samples,
 	const FLAC__uint64 abs_residual_partition_sum
 )
 {
-	return
+	return (uint32_t)(flac_min( // To make sure the return value doesn't overflow
 		FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_PARAMETER_LEN + /* actually could end up being FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_PARAMETER_LEN but err on side of 16bps */
 		(1+rice_parameter) * partition_samples + /* 1 for unary stop bit + rice_parameter for the binary portion */
 		(
 			rice_parameter?
-				(unsigned)(abs_residual_partition_sum >> (rice_parameter-1)) /* rice_parameter-1 because the real coder sign-folds instead of using a sign bit */
-				: (unsigned)(abs_residual_partition_sum << 1) /* can't shift by negative number, so reverse */
+				(abs_residual_partition_sum >> (rice_parameter-1)) /* rice_parameter-1 because the real coder sign-folds instead of using a sign bit */
+				: (abs_residual_partition_sum << 1) /* can't shift by negative number, so reverse */
 		)
-		- (partition_samples >> 1)
+		- (partition_samples >> 1),UINT32_MAX));
 		/* -(partition_samples>>1) to subtract out extra contributions to the abs_residual_partition_sum.
 		 * The actual number of bits used is closer to the sum(for all i in the partition) of  abs(residual[i])>>(rice_parameter-1)
 		 * By using the abs_residual_partition sum, we also add in bits in the LSBs that would normally be shifted out.
@@ -4093,62 +4404,113 @@
 	const FLAC__int32 residual[],
 #endif
 	const FLAC__uint64 abs_residual_partition_sums[],
-	const unsigned raw_bits_per_partition[],
-	const unsigned residual_samples,
-	const unsigned predictor_order,
-	const unsigned suggested_rice_parameter,
-	const unsigned rice_parameter_limit,
-	const unsigned rice_parameter_search_dist,
-	const unsigned partition_order,
+	const uint32_t raw_bits_per_partition[],
+	const uint32_t residual_samples,
+	const uint32_t predictor_order,
+	const uint32_t rice_parameter_limit,
+	const uint32_t rice_parameter_search_dist,
+	const uint32_t partition_order,
 	const FLAC__bool search_for_escapes,
 	FLAC__EntropyCodingMethod_PartitionedRiceContents *partitioned_rice_contents,
-	unsigned *bits
+	uint32_t *bits
 )
 {
-	unsigned rice_parameter, partition_bits;
-	unsigned best_partition_bits, best_rice_parameter = 0;
-	unsigned bits_ = FLAC__ENTROPY_CODING_METHOD_TYPE_LEN + FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ORDER_LEN;
-	unsigned *parameters, *raw_bits;
+	uint32_t rice_parameter, partition_bits;
+	uint32_t best_partition_bits, best_rice_parameter = 0;
+	uint32_t bits_ = FLAC__ENTROPY_CODING_METHOD_TYPE_LEN + FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ORDER_LEN;
+	uint32_t *parameters, *raw_bits;
+	uint32_t partition, residual_sample;
+	uint32_t partition_samples;
+	const uint32_t partitions = 1u << partition_order;
+	FLAC__uint64 mean, k;
 #ifdef ENABLE_RICE_PARAMETER_SEARCH
-	unsigned min_rice_parameter, max_rice_parameter;
+	uint32_t min_rice_parameter, max_rice_parameter;
 #else
 	(void)rice_parameter_search_dist;
 #endif
 
-	FLAC__ASSERT(suggested_rice_parameter < FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_ESCAPE_PARAMETER);
 	FLAC__ASSERT(rice_parameter_limit <= FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_ESCAPE_PARAMETER);
 
-	FLAC__format_entropy_coding_method_partitioned_rice_contents_ensure_size(partitioned_rice_contents, flac_max(6u, partition_order));
 	parameters = partitioned_rice_contents->parameters;
 	raw_bits = partitioned_rice_contents->raw_bits;
 
-	if(partition_order == 0) {
-		best_partition_bits = (unsigned)(-1);
+	for(partition = residual_sample = 0; partition < partitions; partition++) {
+		partition_samples = (residual_samples+predictor_order) >> partition_order;
+		if(partition == 0) {
+			if(partition_samples <= predictor_order)
+				return false;
+			else
+				partition_samples -= predictor_order;
+		}
+		mean = abs_residual_partition_sums[partition];
+		/* we are basically calculating the size in bits of the
+		 * average residual magnitude in the partition:
+		 *   rice_parameter = floor(log2(mean/partition_samples))
+		 * 'mean' is not a good name for the variable, it is
+		 * actually the sum of magnitudes of all residual values
+		 * in the partition, so the actual mean is
+		 * mean/partition_samples
+		 */
+#if 0 /* old simple code */
+		for(rice_parameter = 0, k = partition_samples; k < mean; rice_parameter++, k <<= 1)
+			;
+#else
+#if defined FLAC__CPU_X86_64 /* and other 64-bit arch, too */
+		if(mean <= 0x80000000/512) { /* 512: more or less optimal for both 16- and 24-bit input */
+#else
+		if(mean <= 0x80000000/8) { /* 32-bit arch: use 32-bit math if possible */
+#endif
+			FLAC__uint32 k2, mean2 = (FLAC__uint32) mean;
+			rice_parameter = 0; k2 = partition_samples;
+			while(k2*8 < mean2) { /* requires: mean <= (2^31)/8 */
+				rice_parameter += 4; k2 <<= 4; /* tuned for 16-bit input */
+			}
+			while(k2 < mean2) { /* requires: mean <= 2^31 */
+				rice_parameter++; k2 <<= 1;
+			}
+		}
+		else {
+			rice_parameter = 0; k = partition_samples;
+			if(mean <= FLAC__U64L(0x8000000000000000)/128) /* usually mean is _much_ smaller than this value */
+				while(k*128 < mean) { /* requires: mean <= (2^63)/128 */
+					rice_parameter += 8; k <<= 8; /* tuned for 24-bit input */
+				}
+			while(k < mean) { /* requires: mean <= 2^63 */
+				rice_parameter++; k <<= 1;
+			}
+		}
+#endif
+		if(rice_parameter >= rice_parameter_limit) {
+#ifndef NDEBUG
+			fprintf(stderr, "clipping rice_parameter (%u -> %u) @6\n", rice_parameter, rice_parameter_limit - 1);
+#endif
+			rice_parameter = rice_parameter_limit - 1;
+		}
+
+		best_partition_bits = UINT32_MAX;
 #ifdef ENABLE_RICE_PARAMETER_SEARCH
 		if(rice_parameter_search_dist) {
-			if(suggested_rice_parameter < rice_parameter_search_dist)
+			if(rice_parameter < rice_parameter_search_dist)
 				min_rice_parameter = 0;
 			else
-				min_rice_parameter = suggested_rice_parameter - rice_parameter_search_dist;
-			max_rice_parameter = suggested_rice_parameter + rice_parameter_search_dist;
+				min_rice_parameter = rice_parameter - rice_parameter_search_dist;
+			max_rice_parameter = rice_parameter + rice_parameter_search_dist;
 			if(max_rice_parameter >= rice_parameter_limit) {
-#ifdef DEBUG_VERBOSE
-				fprintf(stderr, "clipping rice_parameter (%u -> %u) @5\n", max_rice_parameter, rice_parameter_limit - 1);
+#ifndef NDEBUG
+				fprintf(stderr, "clipping rice_parameter (%u -> %u) @7\n", max_rice_parameter, rice_parameter_limit - 1);
 #endif
 				max_rice_parameter = rice_parameter_limit - 1;
 			}
 		}
 		else
-			min_rice_parameter = max_rice_parameter = suggested_rice_parameter;
+			min_rice_parameter = max_rice_parameter = rice_parameter;
 
 		for(rice_parameter = min_rice_parameter; rice_parameter <= max_rice_parameter; rice_parameter++) {
-#else
-			rice_parameter = suggested_rice_parameter;
 #endif
 #ifdef EXACT_RICE_BITS_CALCULATION
-			partition_bits = count_rice_bits_in_partition_(rice_parameter, residual_samples, residual);
+			partition_bits = count_rice_bits_in_partition_(rice_parameter, partition_samples, residual+residual_sample);
 #else
-			partition_bits = count_rice_bits_in_partition_(rice_parameter, residual_samples, abs_residual_partition_sums[0]);
+			partition_bits = count_rice_bits_in_partition_(rice_parameter, partition_samples, abs_residual_partition_sums[partition]);
 #endif
 			if(partition_bits < best_partition_bits) {
 				best_rice_parameter = rice_parameter;
@@ -4158,131 +4520,30 @@
 		}
 #endif
 		if(search_for_escapes) {
-			partition_bits = FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_PARAMETER_LEN + FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_RAW_LEN + raw_bits_per_partition[0] * residual_samples;
-			if(partition_bits <= best_partition_bits) {
-				raw_bits[0] = raw_bits_per_partition[0];
+			partition_bits = FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_PARAMETER_LEN + FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_RAW_LEN + raw_bits_per_partition[partition] * partition_samples;
+			if(partition_bits <= best_partition_bits && raw_bits_per_partition[partition] < 32) {
+				raw_bits[partition] = raw_bits_per_partition[partition];
 				best_rice_parameter = 0; /* will be converted to appropriate escape parameter later */
 				best_partition_bits = partition_bits;
 			}
 			else
-				raw_bits[0] = 0;
+				raw_bits[partition] = 0;
 		}
-		parameters[0] = best_rice_parameter;
-		bits_ += best_partition_bits;
-	}
-	else {
-		unsigned partition, residual_sample;
-		unsigned partition_samples;
-		FLAC__uint64 mean, k;
-		const unsigned partitions = 1u << partition_order;
-		for(partition = residual_sample = 0; partition < partitions; partition++) {
-			partition_samples = (residual_samples+predictor_order) >> partition_order;
-			if(partition == 0) {
-				if(partition_samples <= predictor_order)
-					return false;
-				else
-					partition_samples -= predictor_order;
-			}
-			mean = abs_residual_partition_sums[partition];
-			/* we are basically calculating the size in bits of the
-			 * average residual magnitude in the partition:
-			 *   rice_parameter = floor(log2(mean/partition_samples))
-			 * 'mean' is not a good name for the variable, it is
-			 * actually the sum of magnitudes of all residual values
-			 * in the partition, so the actual mean is
-			 * mean/partition_samples
-			 */
-#if 0 /* old simple code */
-			for(rice_parameter = 0, k = partition_samples; k < mean; rice_parameter++, k <<= 1)
-				;
-#else
-#if defined FLAC__CPU_X86_64 /* and other 64-bit arch, too */
-			if(mean <= 0x80000000/512) { /* 512: more or less optimal for both 16- and 24-bit input */
-#else
-			if(mean <= 0x80000000/8) { /* 32-bit arch: use 32-bit math if possible */
-#endif
-				FLAC__uint32 k2, mean2 = (FLAC__uint32) mean;
-				rice_parameter = 0; k2 = partition_samples;
-				while(k2*8 < mean2) { /* requires: mean <= (2^31)/8 */
-					rice_parameter += 4; k2 <<= 4; /* tuned for 16-bit input */
-				}
-				while(k2 < mean2) { /* requires: mean <= 2^31 */
-					rice_parameter++; k2 <<= 1;
-				}
-			}
-			else {
-				rice_parameter = 0; k = partition_samples;
-				if(mean <= FLAC__U64L(0x8000000000000000)/128) /* usually mean is _much_ smaller than this value */
-					while(k*128 < mean) { /* requires: mean <= (2^63)/128 */
-						rice_parameter += 8; k <<= 8; /* tuned for 24-bit input */
-					}
-				while(k < mean) { /* requires: mean <= 2^63 */
-					rice_parameter++; k <<= 1;
-				}
-			}
-#endif
-			if(rice_parameter >= rice_parameter_limit) {
-#ifdef DEBUG_VERBOSE
-				fprintf(stderr, "clipping rice_parameter (%u -> %u) @6\n", rice_parameter, rice_parameter_limit - 1);
-#endif
-				rice_parameter = rice_parameter_limit - 1;
-			}
-
-			best_partition_bits = (unsigned)(-1);
-#ifdef ENABLE_RICE_PARAMETER_SEARCH
-			if(rice_parameter_search_dist) {
-				if(rice_parameter < rice_parameter_search_dist)
-					min_rice_parameter = 0;
-				else
-					min_rice_parameter = rice_parameter - rice_parameter_search_dist;
-				max_rice_parameter = rice_parameter + rice_parameter_search_dist;
-				if(max_rice_parameter >= rice_parameter_limit) {
-#ifdef DEBUG_VERBOSE
-					fprintf(stderr, "clipping rice_parameter (%u -> %u) @7\n", max_rice_parameter, rice_parameter_limit - 1);
-#endif
-					max_rice_parameter = rice_parameter_limit - 1;
-				}
-			}
-			else
-				min_rice_parameter = max_rice_parameter = rice_parameter;
-
-			for(rice_parameter = min_rice_parameter; rice_parameter <= max_rice_parameter; rice_parameter++) {
-#endif
-#ifdef EXACT_RICE_BITS_CALCULATION
-				partition_bits = count_rice_bits_in_partition_(rice_parameter, partition_samples, residual+residual_sample);
-#else
-				partition_bits = count_rice_bits_in_partition_(rice_parameter, partition_samples, abs_residual_partition_sums[partition]);
-#endif
-				if(partition_bits < best_partition_bits) {
-					best_rice_parameter = rice_parameter;
-					best_partition_bits = partition_bits;
-				}
-#ifdef ENABLE_RICE_PARAMETER_SEARCH
-			}
-#endif
-			if(search_for_escapes) {
-				partition_bits = FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_PARAMETER_LEN + FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_RAW_LEN + raw_bits_per_partition[partition] * partition_samples;
-				if(partition_bits <= best_partition_bits) {
-					raw_bits[partition] = raw_bits_per_partition[partition];
-					best_rice_parameter = 0; /* will be converted to appropriate escape parameter later */
-					best_partition_bits = partition_bits;
-				}
-				else
-					raw_bits[partition] = 0;
-			}
-			parameters[partition] = best_rice_parameter;
+		parameters[partition] = best_rice_parameter;
+		if(best_partition_bits < UINT32_MAX - bits_) // To make sure _bits doesn't overflow
 			bits_ += best_partition_bits;
-			residual_sample += partition_samples;
-		}
+		else
+			bits_ = UINT32_MAX;
+		residual_sample += partition_samples;
 	}
 
 	*bits = bits_;
 	return true;
 }
 
-unsigned get_wasted_bits_(FLAC__int32 signal[], unsigned samples)
+uint32_t get_wasted_bits_(FLAC__int32 signal[], uint32_t samples)
 {
-	unsigned i, shift;
+	uint32_t i, shift;
 	FLAC__int32 x = 0;
 
 	for(i = 0; i < samples && !(x&1); i++)
@@ -4304,9 +4565,34 @@
 	return shift;
 }
 
-void append_to_verify_fifo_(verify_input_fifo *fifo, const FLAC__int32 * const input[], unsigned input_offset, unsigned channels, unsigned wide_samples)
+uint32_t get_wasted_bits_wide_(FLAC__int64 signal_wide[], FLAC__int32 signal[], uint32_t samples)
 {
-	unsigned channel;
+	uint32_t i, shift;
+	FLAC__int64 x = 0;
+
+	for(i = 0; i < samples && !(x&1); i++)
+		x |= signal_wide[i];
+
+	if(x == 0) {
+		shift = 1;
+	}
+	else {
+		for(shift = 0; !(x&1); shift++)
+			x >>= 1;
+	}
+
+	if(shift > 0) {
+		for(i = 0; i < samples; i++)
+			 signal[i] = (FLAC__int32)(signal_wide[i] >> shift);
+	}
+
+	return shift;
+}
+
+
+void append_to_verify_fifo_(verify_input_fifo *fifo, const FLAC__int32 * const input[], uint32_t input_offset, uint32_t channels, uint32_t wide_samples)
+{
+	uint32_t channel;
 
 	for(channel = 0; channel < channels; channel++)
 		memcpy(&fifo->data[channel][fifo->tail], &input[channel][input_offset], sizeof(FLAC__int32) * wide_samples);
@@ -4316,11 +4602,11 @@
 	FLAC__ASSERT(fifo->tail <= fifo->size);
 }
 
-void append_to_verify_fifo_interleaved_(verify_input_fifo *fifo, const FLAC__int32 input[], unsigned input_offset, unsigned channels, unsigned wide_samples)
+void append_to_verify_fifo_interleaved_(verify_input_fifo *fifo, const FLAC__int32 input[], uint32_t input_offset, uint32_t channels, uint32_t wide_samples)
 {
-	unsigned channel;
-	unsigned sample, wide_sample;
-	unsigned tail = fifo->tail;
+	uint32_t channel;
+	uint32_t sample, wide_sample;
+	uint32_t tail = fifo->tail;
 
 	sample = input_offset * channels;
 	for(wide_sample = 0; wide_sample < wide_samples; wide_sample++) {
@@ -4367,16 +4653,21 @@
 FLAC__StreamDecoderWriteStatus verify_write_callback_(const FLAC__StreamDecoder *decoder, const FLAC__Frame *frame, const FLAC__int32 * const buffer[], void *client_data)
 {
 	FLAC__StreamEncoder *encoder = (FLAC__StreamEncoder *)client_data;
-	unsigned channel;
-	const unsigned channels = frame->header.channels;
-	const unsigned blocksize = frame->header.blocksize;
-	const unsigned bytes_per_block = sizeof(FLAC__int32) * blocksize;
+	uint32_t channel;
+	const uint32_t channels = frame->header.channels;
+	const uint32_t blocksize = frame->header.blocksize;
+	const uint32_t bytes_per_block = sizeof(FLAC__int32) * blocksize;
 
 	(void)decoder;
 
+	if(encoder->protected_->state == FLAC__STREAM_ENCODER_VERIFY_DECODER_ERROR) {
+		/* This is set when verify_error_callback_ was called */
+		return FLAC__STREAM_DECODER_WRITE_STATUS_ABORT;
+	}
+
 	for(channel = 0; channel < channels; channel++) {
 		if(0 != memcmp(buffer[channel], encoder->private_->verify.input_fifo.data[channel], bytes_per_block)) {
-			unsigned i, sample = 0;
+			uint32_t i, sample = 0;
 			FLAC__int32 expect = 0, got = 0;
 
 			for(i = 0; i < blocksize; i++) {
@@ -4390,7 +4681,7 @@
 			FLAC__ASSERT(i < blocksize);
 			FLAC__ASSERT(frame->header.number_type == FLAC__FRAME_NUMBER_TYPE_SAMPLE_NUMBER);
 			encoder->private_->verify.error_stats.absolute_sample = frame->header.number.sample_number + sample;
-			encoder->private_->verify.error_stats.frame_number = (unsigned)(frame->header.number.sample_number / blocksize);
+			encoder->private_->verify.error_stats.frame_number = (uint32_t)(frame->header.number.sample_number / blocksize);
 			encoder->private_->verify.error_stats.channel = channel;
 			encoder->private_->verify.error_stats.sample = sample;
 			encoder->private_->verify.error_stats.expected = expect;
@@ -4472,7 +4763,7 @@
 #define local__fwrite fwrite
 #endif
 
-FLAC__StreamEncoderWriteStatus file_write_callback_(const FLAC__StreamEncoder *encoder, const FLAC__byte buffer[], size_t bytes, unsigned samples, unsigned current_frame, void *client_data)
+FLAC__StreamEncoderWriteStatus file_write_callback_(const FLAC__StreamEncoder *encoder, const FLAC__byte buffer[], size_t bytes, uint32_t samples, uint32_t current_frame, void *client_data)
 {
 	(void)client_data, (void)current_frame;
 
@@ -4514,9 +4805,6 @@
 	 */
 #if defined _MSC_VER || defined __MINGW32__
 	_setmode(_fileno(stdout), _O_BINARY);
-#elif defined __CYGWIN__
-	/* almost certainly not needed for any modern Cygwin, but let's be safe... */
-	setmode(_fileno(stdout), _O_BINARY);
 #elif defined __EMX__
 	setmode(fileno(stdout), O_BINARY);
 #endif

diff --git a/src/libFLAC/stream_encoder_framing.c b/src/libFLAC/stream_encoder_framing.c
index 959eca0..a3bdd71 100644
--- a/src/libFLAC/stream_encoder_framing.c
+++ b/src/libFLAC/stream_encoder_framing.c

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2000-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -39,14 +39,15 @@
 #include "private/stream_encoder_framing.h"
 #include "private/crc.h"
 #include "FLAC/assert.h"
+#include "share/compat.h"
 
 static FLAC__bool add_entropy_coding_method_(FLAC__BitWriter *bw, const FLAC__EntropyCodingMethod *method);
-static FLAC__bool add_residual_partitioned_rice_(FLAC__BitWriter *bw, const FLAC__int32 residual[], const unsigned residual_samples, const unsigned predictor_order, const unsigned rice_parameters[], const unsigned raw_bits[], const unsigned partition_order, const FLAC__bool is_extended);
+static FLAC__bool add_residual_partitioned_rice_(FLAC__BitWriter *bw, const FLAC__int32 residual[], const uint32_t residual_samples, const uint32_t predictor_order, const uint32_t rice_parameters[], const uint32_t raw_bits[], const uint32_t partition_order, const FLAC__bool is_extended);
 
 FLAC__bool FLAC__add_metadata_block(const FLAC__StreamMetadata *metadata, FLAC__BitWriter *bw)
 {
-	unsigned i, j;
-	const unsigned vendor_string_length = (unsigned)strlen(FLAC__VENDOR_STRING);
+	uint32_t i, j;
+	const uint32_t vendor_string_length = (uint32_t)strlen(FLAC__VENDOR_STRING);
 
 	if(!FLAC__bitwriter_write_raw_uint32(bw, metadata->is_last, FLAC__STREAM_METADATA_IS_LAST_LEN))
 		return false;
@@ -64,6 +65,9 @@
 		i += vendor_string_length;
 	}
 	FLAC__ASSERT(i < (1u << FLAC__STREAM_METADATA_LENGTH_LEN));
+	/* double protection */
+	if(i >= (1u << FLAC__STREAM_METADATA_LENGTH_LEN))
+		return false;
 	if(!FLAC__bitwriter_write_raw_uint32(bw, i, FLAC__STREAM_METADATA_LENGTH_LEN))
 		return false;
 
@@ -92,8 +96,13 @@
 			FLAC__ASSERT(metadata->data.stream_info.bits_per_sample <= (1u << FLAC__STREAM_METADATA_STREAMINFO_BITS_PER_SAMPLE_LEN));
 			if(!FLAC__bitwriter_write_raw_uint32(bw, metadata->data.stream_info.bits_per_sample-1, FLAC__STREAM_METADATA_STREAMINFO_BITS_PER_SAMPLE_LEN))
 				return false;
-			if(!FLAC__bitwriter_write_raw_uint64(bw, metadata->data.stream_info.total_samples, FLAC__STREAM_METADATA_STREAMINFO_TOTAL_SAMPLES_LEN))
-				return false;
+			if(metadata->data.stream_info.total_samples >= (FLAC__U64L(1) << FLAC__STREAM_METADATA_STREAMINFO_TOTAL_SAMPLES_LEN)){
+				if(!FLAC__bitwriter_write_raw_uint64(bw, 0, FLAC__STREAM_METADATA_STREAMINFO_TOTAL_SAMPLES_LEN))
+					return false;
+			}else{
+				if(!FLAC__bitwriter_write_raw_uint64(bw, metadata->data.stream_info.total_samples, FLAC__STREAM_METADATA_STREAMINFO_TOTAL_SAMPLES_LEN))
+					return false;
+			}
 			if(!FLAC__bitwriter_write_byte_block(bw, metadata->data.stream_info.md5sum, 16))
 				return false;
 			break;
@@ -214,7 +223,7 @@
 
 FLAC__bool FLAC__frame_add_header(const FLAC__FrameHeader *header, FLAC__BitWriter *bw)
 {
-	unsigned u, blocksize_hint, sample_rate_hint;
+	uint32_t u, blocksize_hint, sample_rate_hint;
 	FLAC__byte crc;
 
 	FLAC__ASSERT(FLAC__bitwriter_is_byte_aligned(bw));
@@ -273,7 +282,7 @@
 		default:
 			if(header->sample_rate <= 255000 && header->sample_rate % 1000 == 0)
 				sample_rate_hint = u = 12;
-			else if(header->sample_rate % 10 == 0)
+			else if(header->sample_rate <= 655350 && header->sample_rate % 10 == 0)
 				sample_rate_hint = u = 14;
 			else if(header->sample_rate <= 0xffff)
 				sample_rate_hint = u = 13;
@@ -314,6 +323,7 @@
 		case 16: u = 4; break;
 		case 20: u = 5; break;
 		case 24: u = 6; break;
+		case 32: u = 7; break;
 		default: u = 0; break;
 	}
 	if(!FLAC__bitwriter_write_raw_uint32(bw, u, FLAC__FRAME_HEADER_BITS_PER_SAMPLE_LEN))
@@ -359,22 +369,22 @@
 	return true;
 }
 
-FLAC__bool FLAC__subframe_add_constant(const FLAC__Subframe_Constant *subframe, unsigned subframe_bps, unsigned wasted_bits, FLAC__BitWriter *bw)
+FLAC__bool FLAC__subframe_add_constant(const FLAC__Subframe_Constant *subframe, uint32_t subframe_bps, uint32_t wasted_bits, FLAC__BitWriter *bw)
 {
 	FLAC__bool ok;
 
 	ok =
 		FLAC__bitwriter_write_raw_uint32(bw, FLAC__SUBFRAME_TYPE_CONSTANT_BYTE_ALIGNED_MASK | (wasted_bits? 1:0), FLAC__SUBFRAME_ZERO_PAD_LEN + FLAC__SUBFRAME_TYPE_LEN + FLAC__SUBFRAME_WASTED_BITS_FLAG_LEN) &&
 		(wasted_bits? FLAC__bitwriter_write_unary_unsigned(bw, wasted_bits-1) : true) &&
-		FLAC__bitwriter_write_raw_int32(bw, subframe->value, subframe_bps)
+		FLAC__bitwriter_write_raw_int64(bw, subframe->value, subframe_bps)
 	;
 
 	return ok;
 }
 
-FLAC__bool FLAC__subframe_add_fixed(const FLAC__Subframe_Fixed *subframe, unsigned residual_samples, unsigned subframe_bps, unsigned wasted_bits, FLAC__BitWriter *bw)
+FLAC__bool FLAC__subframe_add_fixed(const FLAC__Subframe_Fixed *subframe, uint32_t residual_samples, uint32_t subframe_bps, uint32_t wasted_bits, FLAC__BitWriter *bw)
 {
-	unsigned i;
+	uint32_t i;
 
 	if(!FLAC__bitwriter_write_raw_uint32(bw, FLAC__SUBFRAME_TYPE_FIXED_BYTE_ALIGNED_MASK | (subframe->order<<1) | (wasted_bits? 1:0), FLAC__SUBFRAME_ZERO_PAD_LEN + FLAC__SUBFRAME_TYPE_LEN + FLAC__SUBFRAME_WASTED_BITS_FLAG_LEN))
 		return false;
@@ -383,7 +393,7 @@
 			return false;
 
 	for(i = 0; i < subframe->order; i++)
-		if(!FLAC__bitwriter_write_raw_int32(bw, subframe->warmup[i], subframe_bps))
+		if(!FLAC__bitwriter_write_raw_int64(bw, subframe->warmup[i], subframe_bps))
 			return false;
 
 	if(!add_entropy_coding_method_(bw, &subframe->entropy_coding_method))
@@ -410,9 +420,9 @@
 	return true;
 }
 
-FLAC__bool FLAC__subframe_add_lpc(const FLAC__Subframe_LPC *subframe, unsigned residual_samples, unsigned subframe_bps, unsigned wasted_bits, FLAC__BitWriter *bw)
+FLAC__bool FLAC__subframe_add_lpc(const FLAC__Subframe_LPC *subframe, uint32_t residual_samples, uint32_t subframe_bps, uint32_t wasted_bits, FLAC__BitWriter *bw)
 {
-	unsigned i;
+	uint32_t i;
 
 	if(!FLAC__bitwriter_write_raw_uint32(bw, FLAC__SUBFRAME_TYPE_LPC_BYTE_ALIGNED_MASK | ((subframe->order-1)<<1) | (wasted_bits? 1:0), FLAC__SUBFRAME_ZERO_PAD_LEN + FLAC__SUBFRAME_TYPE_LEN + FLAC__SUBFRAME_WASTED_BITS_FLAG_LEN))
 		return false;
@@ -421,7 +431,7 @@
 			return false;
 
 	for(i = 0; i < subframe->order; i++)
-		if(!FLAC__bitwriter_write_raw_int32(bw, subframe->warmup[i], subframe_bps))
+		if(!FLAC__bitwriter_write_raw_int64(bw, subframe->warmup[i], subframe_bps))
 			return false;
 
 	if(!FLAC__bitwriter_write_raw_uint32(bw, subframe->qlp_coeff_precision-1, FLAC__SUBFRAME_LPC_QLP_COEFF_PRECISION_LEN))
@@ -456,10 +466,9 @@
 	return true;
 }
 
-FLAC__bool FLAC__subframe_add_verbatim(const FLAC__Subframe_Verbatim *subframe, unsigned samples, unsigned subframe_bps, unsigned wasted_bits, FLAC__BitWriter *bw)
+FLAC__bool FLAC__subframe_add_verbatim(const FLAC__Subframe_Verbatim *subframe, uint32_t samples, uint32_t subframe_bps, uint32_t wasted_bits, FLAC__BitWriter *bw)
 {
-	unsigned i;
-	const FLAC__int32 *signal = subframe->data;
+	uint32_t i;
 
 	if(!FLAC__bitwriter_write_raw_uint32(bw, FLAC__SUBFRAME_TYPE_VERBATIM_BYTE_ALIGNED_MASK | (wasted_bits? 1:0), FLAC__SUBFRAME_ZERO_PAD_LEN + FLAC__SUBFRAME_TYPE_LEN + FLAC__SUBFRAME_WASTED_BITS_FLAG_LEN))
 		return false;
@@ -467,9 +476,24 @@
 		if(!FLAC__bitwriter_write_unary_unsigned(bw, wasted_bits-1))
 			return false;
 
-	for(i = 0; i < samples; i++)
-		if(!FLAC__bitwriter_write_raw_int32(bw, signal[i], subframe_bps))
-			return false;
+	if(subframe->data_type == FLAC__VERBATIM_SUBFRAME_DATA_TYPE_INT32) {
+		const FLAC__int32 *signal = subframe->data.int32;
+
+		FLAC__ASSERT(subframe_bps < 33);
+
+		for(i = 0; i < samples; i++)
+			if(!FLAC__bitwriter_write_raw_int32(bw, signal[i], subframe_bps))
+				return false;
+	}
+	else {
+		const FLAC__int64 *signal = subframe->data.int64;
+
+		FLAC__ASSERT(subframe_bps == 33);
+
+		for(i = 0; i < samples; i++)
+			if(!FLAC__bitwriter_write_raw_int64(bw, (FLAC__int64)signal[i], subframe_bps))
+				return false;
+	}
 
 	return true;
 }
@@ -490,13 +514,13 @@
 	return true;
 }
 
-FLAC__bool add_residual_partitioned_rice_(FLAC__BitWriter *bw, const FLAC__int32 residual[], const unsigned residual_samples, const unsigned predictor_order, const unsigned rice_parameters[], const unsigned raw_bits[], const unsigned partition_order, const FLAC__bool is_extended)
+FLAC__bool add_residual_partitioned_rice_(FLAC__BitWriter *bw, const FLAC__int32 residual[], const uint32_t residual_samples, const uint32_t predictor_order, const uint32_t rice_parameters[], const uint32_t raw_bits[], const uint32_t partition_order, const FLAC__bool is_extended)
 {
-	const unsigned plen = is_extended? FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_PARAMETER_LEN : FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_PARAMETER_LEN;
-	const unsigned pesc = is_extended? FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_ESCAPE_PARAMETER : FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ESCAPE_PARAMETER;
+	const uint32_t plen = is_extended? FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_PARAMETER_LEN : FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_PARAMETER_LEN;
+	const uint32_t pesc = is_extended? FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE2_ESCAPE_PARAMETER : FLAC__ENTROPY_CODING_METHOD_PARTITIONED_RICE_ESCAPE_PARAMETER;
 
 	if(partition_order == 0) {
-		unsigned i;
+		uint32_t i;
 
 		if(raw_bits[0] == 0) {
 			if(!FLAC__bitwriter_write_raw_uint32(bw, rice_parameters[0], plen))
@@ -518,9 +542,9 @@
 		return true;
 	}
 	else {
-		unsigned i, j, k = 0, k_last = 0;
-		unsigned partition_samples;
-		const unsigned default_partition_samples = (residual_samples+predictor_order) >> partition_order;
+		uint32_t i, j, k = 0, k_last = 0;
+		uint32_t partition_samples;
+		const uint32_t default_partition_samples = (residual_samples+predictor_order) >> partition_order;
 		for(i = 0; i < (1u<<partition_order); i++) {
 			partition_samples = default_partition_samples;
 			if(i == 0)

diff --git a/src/libFLAC/stream_encoder_intrin_avx2.c b/src/libFLAC/stream_encoder_intrin_avx2.c
index 3aa3197..665f803 100644
--- a/src/libFLAC/stream_encoder_intrin_avx2.c
+++ b/src/libFLAC/stream_encoder_intrin_avx2.c

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2000-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -34,8 +34,10 @@
 #  include <config.h>
 #endif
 
+#include "private/cpu.h"
+
 #ifndef FLAC__NO_ASM
-#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
+#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN
 #include "private/stream_encoder.h"
 #include "private/bitmath.h"
 #ifdef FLAC__AVX2_SUPPORTED
@@ -46,85 +48,87 @@
 
 FLAC__SSE_TARGET("avx2")
 void FLAC__precompute_partition_info_sums_intrin_avx2(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
-		unsigned residual_samples, unsigned predictor_order, unsigned min_partition_order, unsigned max_partition_order, unsigned bps)
+		uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t bps)
 {
-	const unsigned default_partition_samples = (residual_samples + predictor_order) >> max_partition_order;
-	unsigned partitions = 1u << max_partition_order;
+	const uint32_t default_partition_samples = (residual_samples + predictor_order) >> max_partition_order;
+	uint32_t partitions = 1u << max_partition_order;
 
 	FLAC__ASSERT(default_partition_samples > predictor_order);
 
 	/* first do max_partition_order */
 	{
-		unsigned partition, residual_sample, end = (unsigned)(-(int)predictor_order);
-		__m256i res256, sum256;
-		__m128i res128, sum128;
+		const uint32_t threshold = 32 - FLAC__bitmath_ilog2(default_partition_samples);
+		uint32_t partition, residual_sample, end = (uint32_t)(-(int32_t)predictor_order);
 
-		if(FLAC__bitmath_ilog2(default_partition_samples) + bps + FLAC__MAX_EXTRA_RESIDUAL_BPS < 32) {
+		if(bps + FLAC__MAX_EXTRA_RESIDUAL_BPS < threshold) {
 			for(partition = residual_sample = 0; partition < partitions; partition++) {
+				__m256i sum256 = _mm256_setzero_si256();
+				__m128i sum128;
 				end += default_partition_samples;
-				sum256 = _mm256_setzero_si256();
 
 				for( ; (int)residual_sample < (int)end-7; residual_sample+=8) {
-					res256 = _mm256_abs_epi32(_mm256_loadu_si256((const __m256i*)(residual+residual_sample)));
+					__m256i res256 = _mm256_abs_epi32(_mm256_loadu_si256((const __m256i*)(const void*)(residual+residual_sample)));
 					sum256 = _mm256_add_epi32(sum256, res256);
 				}
 
 				sum128 = _mm_add_epi32(_mm256_extracti128_si256(sum256, 1), _mm256_castsi256_si128(sum256));
 
 				for( ; (int)residual_sample < (int)end-3; residual_sample+=4) {
-					res128 = _mm_abs_epi32(_mm_loadu_si128((const __m128i*)(residual+residual_sample)));
+					__m128i res128 = _mm_abs_epi32(_mm_loadu_si128((const __m128i*)(const void*)(residual+residual_sample)));
 					sum128 = _mm_add_epi32(sum128, res128);
 				}
 
 				for( ; residual_sample < end; residual_sample++) {
-					res128 = _mm_cvtsi32_si128(residual[residual_sample]);
-					res128 = _mm_abs_epi32(res128);
+					__m128i res128 = _mm_abs_epi32(_mm_cvtsi32_si128(residual[residual_sample]));
 					sum128 = _mm_add_epi32(sum128, res128);
 				}
 
-				sum128 = _mm_hadd_epi32(sum128, sum128);
-				sum128 = _mm_hadd_epi32(sum128, sum128);
+				sum128 = _mm_add_epi32(sum128, _mm_shuffle_epi32(sum128, _MM_SHUFFLE(1,0,3,2)));
+				sum128 = _mm_add_epi32(sum128, _mm_shufflelo_epi16(sum128, _MM_SHUFFLE(1,0,3,2)));
 				abs_residual_partition_sums[partition] = (FLAC__uint32)_mm_cvtsi128_si32(sum128);
+/* workaround for MSVC bugs (at least versions 2015 and 2017 are affected) */
+#if (defined _MSC_VER) && (defined FLAC__CPU_X86_64)
+				abs_residual_partition_sums[partition] &= 0xFFFFFFFF; /**/
+#endif
 			}
 		}
 		else { /* have to pessimistically use 64 bits for accumulator */
 			for(partition = residual_sample = 0; partition < partitions; partition++) {
+				__m256i sum256 = _mm256_setzero_si256();
+				__m128i sum128;
 				end += default_partition_samples;
-				sum256 = _mm256_setzero_si256();
 
 				for( ; (int)residual_sample < (int)end-3; residual_sample+=4) {
-					res128 = _mm_abs_epi32(_mm_loadu_si128((const __m128i*)(residual+residual_sample)));
-					res256 = _mm256_cvtepu32_epi64(res128);
+					__m128i res128 = _mm_abs_epi32(_mm_loadu_si128((const __m128i*)(const void*)(residual+residual_sample)));
+					__m256i res256 = _mm256_cvtepu32_epi64(res128);
 					sum256 = _mm256_add_epi64(sum256, res256);
 				}
 
 				sum128 = _mm_add_epi64(_mm256_extracti128_si256(sum256, 1), _mm256_castsi256_si128(sum256));
 
 				for( ; (int)residual_sample < (int)end-1; residual_sample+=2) {
-					res128 = _mm_loadl_epi64((const __m128i*)(residual+residual_sample));
-					res128 = _mm_abs_epi32(res128);
+					__m128i res128 = _mm_abs_epi32(_mm_loadl_epi64((const __m128i*)(const void*)(residual+residual_sample)));
 					res128 = _mm_cvtepu32_epi64(res128);
 					sum128 = _mm_add_epi64(sum128, res128);
 				}
 
 				for( ; residual_sample < end; residual_sample++) {
-					res128 = _mm_cvtsi32_si128(residual[residual_sample]);
-					res128 = _mm_abs_epi32(res128);
+					__m128i res128 = _mm_abs_epi32(_mm_cvtsi32_si128(residual[residual_sample]));
 					sum128 = _mm_add_epi64(sum128, res128);
 				}
 
 				sum128 = _mm_add_epi64(sum128, _mm_srli_si128(sum128, 8));
-				_mm_storel_epi64((__m128i*)(abs_residual_partition_sums+partition), sum128);
+				_mm_storel_epi64((__m128i*)(void*)(abs_residual_partition_sums+partition), sum128);
 			}
 		}
 	}
 
 	/* now merge partitions for lower orders */
 	{
-		unsigned from_partition = 0, to_partition = partitions;
+		uint32_t from_partition = 0, to_partition = partitions;
 		int partition_order;
 		for(partition_order = (int)max_partition_order - 1; partition_order >= (int)min_partition_order; partition_order--) {
-			unsigned i;
+			uint32_t i;
 			partitions >>= 1;
 			for(i = 0; i < partitions; i++) {
 				abs_residual_partition_sums[to_partition++] =

diff --git a/src/libFLAC/stream_encoder_intrin_sse2.c b/src/libFLAC/stream_encoder_intrin_sse2.c
index ec5541c..fdab55b 100644
--- a/src/libFLAC/stream_encoder_intrin_sse2.c
+++ b/src/libFLAC/stream_encoder_intrin_sse2.c

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2000-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -34,8 +34,10 @@
 #  include <config.h>
 #endif
 
+#include "private/cpu.h"
+
 #ifndef FLAC__NO_ASM
-#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
+#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN
 #include "private/stream_encoder.h"
 #include "private/bitmath.h"
 #ifdef FLAC__SSE2_SUPPORTED
@@ -43,26 +45,37 @@
 #include <stdlib.h>    /* for abs() */
 #include <emmintrin.h> /* SSE2 */
 #include "FLAC/assert.h"
+#include "share/compat.h"
+
+FLAC__SSE_TARGET("sse2")
+static inline __m128i local_abs_epi32(__m128i val)
+{
+	__m128i mask = _mm_srai_epi32(val, 31);
+	val = _mm_xor_si128(val, mask);
+	val = _mm_sub_epi32(val, mask);
+	return val;
+}
+
 
 FLAC__SSE_TARGET("sse2")
 void FLAC__precompute_partition_info_sums_intrin_sse2(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
-		unsigned residual_samples, unsigned predictor_order, unsigned min_partition_order, unsigned max_partition_order, unsigned bps)
+		uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t bps)
 {
-	const unsigned default_partition_samples = (residual_samples + predictor_order) >> max_partition_order;
-	unsigned partitions = 1u << max_partition_order;
+	const uint32_t default_partition_samples = (residual_samples + predictor_order) >> max_partition_order;
+	uint32_t partitions = 1u << max_partition_order;
 
 	FLAC__ASSERT(default_partition_samples > predictor_order);
 
 	/* first do max_partition_order */
 	{
-		unsigned partition, residual_sample, end = (unsigned)(-(int)predictor_order);
-		unsigned e1, e3;
-		__m128i mm_res, mm_sum, mm_mask;
+		const uint32_t threshold = 32 - FLAC__bitmath_ilog2(default_partition_samples);
+		uint32_t partition, residual_sample, end = (uint32_t)(-(int32_t)predictor_order);
 
-		if(FLAC__bitmath_ilog2(default_partition_samples) + bps + FLAC__MAX_EXTRA_RESIDUAL_BPS < 32) {
+		if(bps + FLAC__MAX_EXTRA_RESIDUAL_BPS < threshold) {
 			for(partition = residual_sample = 0; partition < partitions; partition++) {
+				__m128i mm_sum = _mm_setzero_si128();
+				uint32_t e1, e3;
 				end += default_partition_samples;
-				mm_sum = _mm_setzero_si128();
 
 				e1 = (residual_sample + 3) & ~3; e3 = end & ~3;
 				if(e1 > end)
@@ -70,79 +83,66 @@
 
 				/* assumption: residual[] is properly aligned so (residual + e1) is properly aligned too and _mm_loadu_si128() is fast */
 				for( ; residual_sample < e1; residual_sample++) {
-					mm_res = _mm_cvtsi32_si128(residual[residual_sample]);
-					mm_mask = _mm_srai_epi32(mm_res, 31);
-					mm_res = _mm_xor_si128(mm_res, mm_mask);
-					mm_res = _mm_sub_epi32(mm_res, mm_mask); /* abs(INT_MIN) is undefined, but if the residual is INT_MIN we have bigger problems */
+					__m128i mm_res = local_abs_epi32(_mm_cvtsi32_si128(residual[residual_sample]));
 					mm_sum = _mm_add_epi32(mm_sum, mm_res);
 				}
 
 				for( ; residual_sample < e3; residual_sample+=4) {
-					mm_res = _mm_loadu_si128((const __m128i*)(residual+residual_sample));
-					mm_mask = _mm_srai_epi32(mm_res, 31);
-					mm_res = _mm_xor_si128(mm_res, mm_mask);
-					mm_res = _mm_sub_epi32(mm_res, mm_mask);
+					__m128i mm_res = local_abs_epi32(_mm_loadu_si128((const __m128i*)(const void*)(residual+residual_sample)));
 					mm_sum = _mm_add_epi32(mm_sum, mm_res);
 				}
 
 				for( ; residual_sample < end; residual_sample++) {
-					mm_res = _mm_cvtsi32_si128(residual[residual_sample]);
-					mm_mask = _mm_srai_epi32(mm_res, 31);
-					mm_res = _mm_xor_si128(mm_res, mm_mask);
-					mm_res = _mm_sub_epi32(mm_res, mm_mask);
+					__m128i mm_res = local_abs_epi32(_mm_cvtsi32_si128(residual[residual_sample]));
 					mm_sum = _mm_add_epi32(mm_sum, mm_res);
 				}
 
-				mm_sum = _mm_add_epi32(mm_sum, _mm_srli_si128(mm_sum, 8));
-				mm_sum = _mm_add_epi32(mm_sum, _mm_srli_si128(mm_sum, 4));
+				mm_sum = _mm_add_epi32(mm_sum, _mm_shuffle_epi32(mm_sum, _MM_SHUFFLE(1,0,3,2)));
+				mm_sum = _mm_add_epi32(mm_sum, _mm_shufflelo_epi16(mm_sum, _MM_SHUFFLE(1,0,3,2)));
 				abs_residual_partition_sums[partition] = (FLAC__uint32)_mm_cvtsi128_si32(mm_sum);
+/* workaround for MSVC bugs (at least versions 2015 and 2017 are affected) */
+#if (defined _MSC_VER) && (defined FLAC__CPU_X86_64)
+				abs_residual_partition_sums[partition] &= 0xFFFFFFFF;
+#endif
 			}
 		}
 		else { /* have to pessimistically use 64 bits for accumulator */
 			for(partition = residual_sample = 0; partition < partitions; partition++) {
+				__m128i mm_sum = _mm_setzero_si128();
+				uint32_t e1, e3;
 				end += default_partition_samples;
-				mm_sum = _mm_setzero_si128();
 
 				e1 = (residual_sample + 1) & ~1; e3 = end & ~1;
 				FLAC__ASSERT(e1 <= end);
 
 				for( ; residual_sample < e1; residual_sample++) {
-					mm_res = _mm_cvtsi32_si128(residual[residual_sample]); /*  0   0   0   r0 */
-					mm_mask = _mm_srai_epi32(mm_res, 31);
-					mm_res = _mm_xor_si128(mm_res, mm_mask);
-					mm_res = _mm_sub_epi32(mm_res, mm_mask); /*  0   0   0  |r0|  ==   00   |r0_64| */
+					__m128i mm_res = local_abs_epi32(_mm_cvtsi32_si128(residual[residual_sample])); /*  0   0   0  |r0|  ==   00   |r0_64| */
 					mm_sum = _mm_add_epi64(mm_sum, mm_res);
 				}
 
 				for( ; residual_sample < e3; residual_sample+=2) {
-					mm_res = _mm_loadl_epi64((const __m128i*)(residual+residual_sample)); /*  0   0   r1  r0 */
-					mm_mask = _mm_srai_epi32(mm_res, 31);
-					mm_res = _mm_xor_si128(mm_res, mm_mask);
-					mm_res = _mm_sub_epi32(mm_res, mm_mask); /*  0   0  |r1|   |r0| */
+					__m128i mm_res = local_abs_epi32(_mm_loadl_epi64((const __m128i*)(const void*)(residual+residual_sample))); /*  0   0  |r1|   |r0| */
 					mm_res = _mm_shuffle_epi32(mm_res, _MM_SHUFFLE(3,1,2,0)); /* 0  |r1|  0  |r0|  ==  |r1_64|  |r0_64|  */
 					mm_sum = _mm_add_epi64(mm_sum, mm_res);
 				}
 
 				for( ; residual_sample < end; residual_sample++) {
-					mm_res = _mm_cvtsi32_si128(residual[residual_sample]);
-					mm_mask = _mm_srai_epi32(mm_res, 31);
-					mm_res = _mm_xor_si128(mm_res, mm_mask);
-					mm_res = _mm_sub_epi32(mm_res, mm_mask);
+					__m128i mm_res = local_abs_epi32(_mm_cvtsi32_si128(residual[residual_sample]));
 					mm_sum = _mm_add_epi64(mm_sum, mm_res);
 				}
 
 				mm_sum = _mm_add_epi64(mm_sum, _mm_srli_si128(mm_sum, 8));
-				_mm_storel_epi64((__m128i*)(abs_residual_partition_sums+partition), mm_sum);
+				_mm_storel_epi64((__m128i*)(void*)(abs_residual_partition_sums+partition), mm_sum);
 			}
 		}
 	}
 
 	/* now merge partitions for lower orders */
 	{
-		unsigned from_partition = 0, to_partition = partitions;
+		uint32_t from_partition = 0, to_partition = partitions;
 		int partition_order;
 		for(partition_order = (int)max_partition_order - 1; partition_order >= (int)min_partition_order; partition_order--) {
-			unsigned i;
+			uint32_t i;
 			partitions >>= 1;
 			for(i = 0; i < partitions; i++) {
 				abs_residual_partition_sums[to_partition++] =

diff --git a/src/libFLAC/stream_encoder_intrin_ssse3.c b/src/libFLAC/stream_encoder_intrin_ssse3.c
index 2dbd18c..6f21752 100644
--- a/src/libFLAC/stream_encoder_intrin_ssse3.c
+++ b/src/libFLAC/stream_encoder_intrin_ssse3.c

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2000-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -34,8 +34,10 @@
 #  include <config.h>
 #endif
 
+#include "private/cpu.h"
+
 #ifndef FLAC__NO_ASM
-#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
+#if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN
 #include "private/stream_encoder.h"
 #include "private/bitmath.h"
 #ifdef FLAC__SSSE3_SUPPORTED
@@ -46,23 +48,23 @@
 
 FLAC__SSE_TARGET("ssse3")
 void FLAC__precompute_partition_info_sums_intrin_ssse3(const FLAC__int32 residual[], FLAC__uint64 abs_residual_partition_sums[],
-		unsigned residual_samples, unsigned predictor_order, unsigned min_partition_order, unsigned max_partition_order, unsigned bps)
+		uint32_t residual_samples, uint32_t predictor_order, uint32_t min_partition_order, uint32_t max_partition_order, uint32_t bps)
 {
-	const unsigned default_partition_samples = (residual_samples + predictor_order) >> max_partition_order;
-	unsigned partitions = 1u << max_partition_order;
+	const uint32_t default_partition_samples = (residual_samples + predictor_order) >> max_partition_order;
+	uint32_t partitions = 1u << max_partition_order;
 
 	FLAC__ASSERT(default_partition_samples > predictor_order);
 
 	/* first do max_partition_order */
 	{
-		unsigned partition, residual_sample, end = (unsigned)(-(int)predictor_order);
-		unsigned e1, e3;
-		__m128i mm_res, mm_sum;
+		const uint32_t threshold = 32 - FLAC__bitmath_ilog2(default_partition_samples);
+		uint32_t partition, residual_sample, end = (uint32_t)(-(int32_t)predictor_order);
 
-		if(FLAC__bitmath_ilog2(default_partition_samples) + bps + FLAC__MAX_EXTRA_RESIDUAL_BPS < 32) {
+		if(bps + FLAC__MAX_EXTRA_RESIDUAL_BPS < threshold) {
 			for(partition = residual_sample = 0; partition < partitions; partition++) {
+				__m128i mm_sum = _mm_setzero_si128();
+				uint32_t e1, e3;
 				end += default_partition_samples;
-				mm_sum = _mm_setzero_si128();
 
 				e1 = (residual_sample + 3) & ~3; e3 = end & ~3;
 				if(e1 > end)
@@ -70,67 +72,66 @@
 
 				/* assumption: residual[] is properly aligned so (residual + e1) is properly aligned too and _mm_loadu_si128() is fast */
 				for( ; residual_sample < e1; residual_sample++) {
-					mm_res = _mm_cvtsi32_si128(residual[residual_sample]);
-					mm_res = _mm_abs_epi32(mm_res); /* abs(INT_MIN) is undefined, but if the residual is INT_MIN we have bigger problems */
+					__m128i mm_res = _mm_abs_epi32(_mm_cvtsi32_si128(residual[residual_sample]));
 					mm_sum = _mm_add_epi32(mm_sum, mm_res);
 				}
 
 				for( ; residual_sample < e3; residual_sample+=4) {
-					mm_res = _mm_loadu_si128((const __m128i*)(residual+residual_sample));
-					mm_res = _mm_abs_epi32(mm_res);
+					__m128i mm_res = _mm_abs_epi32(_mm_loadu_si128((const __m128i*)(const void*)(residual+residual_sample)));
 					mm_sum = _mm_add_epi32(mm_sum, mm_res);
 				}
 
 				for( ; residual_sample < end; residual_sample++) {
-					mm_res = _mm_cvtsi32_si128(residual[residual_sample]);
-					mm_res = _mm_abs_epi32(mm_res);
+					__m128i mm_res = _mm_abs_epi32(_mm_cvtsi32_si128(residual[residual_sample]));
 					mm_sum = _mm_add_epi32(mm_sum, mm_res);
 				}
 
-				mm_sum = _mm_hadd_epi32(mm_sum, mm_sum);
-				mm_sum = _mm_hadd_epi32(mm_sum, mm_sum);
+				mm_sum = _mm_add_epi32(mm_sum, _mm_shuffle_epi32(mm_sum, _MM_SHUFFLE(1,0,3,2)));
+				mm_sum = _mm_add_epi32(mm_sum, _mm_shufflelo_epi16(mm_sum, _MM_SHUFFLE(1,0,3,2)));
 				abs_residual_partition_sums[partition] = (FLAC__uint32)_mm_cvtsi128_si32(mm_sum);
+/* workaround for MSVC bugs (at least versions 2015 and 2017 are affected) */
+#if (defined _MSC_VER) && (defined FLAC__CPU_X86_64)
+				abs_residual_partition_sums[partition] &= 0xFFFFFFFF;
+#endif
 			}
 		}
 		else { /* have to pessimistically use 64 bits for accumulator */
 			for(partition = residual_sample = 0; partition < partitions; partition++) {
+				__m128i mm_sum = _mm_setzero_si128();
+				uint32_t e1, e3;
 				end += default_partition_samples;
-				mm_sum = _mm_setzero_si128();
 
 				e1 = (residual_sample + 1) & ~1; e3 = end & ~1;
 				FLAC__ASSERT(e1 <= end);
 
 				for( ; residual_sample < e1; residual_sample++) {
-					mm_res = _mm_cvtsi32_si128(residual[residual_sample]); /*  0   0   0   r0 */
-					mm_res = _mm_abs_epi32(mm_res); /*  0   0   0  |r0|  ==   00   |r0_64| */
+					__m128i mm_res = _mm_abs_epi32(_mm_cvtsi32_si128(residual[residual_sample])); /*  0   0   0  |r0|  ==   00   |r0_64| */
 					mm_sum = _mm_add_epi64(mm_sum, mm_res);
 				}
 
 				for( ; residual_sample < e3; residual_sample+=2) {
-					mm_res = _mm_loadl_epi64((const __m128i*)(residual+residual_sample)); /*  0   0   r1  r0 */
-					mm_res = _mm_abs_epi32(mm_res); /*  0   0  |r1|   |r0| */
+					__m128i mm_res = _mm_abs_epi32(_mm_loadl_epi64((const __m128i*)(const void*)(residual+residual_sample))); /*  0   0  |r1|   |r0| */
 					mm_res = _mm_shuffle_epi32(mm_res, _MM_SHUFFLE(3,1,2,0)); /* 0  |r1|  0  |r0|  ==  |r1_64|  |r0_64|  */
 					mm_sum = _mm_add_epi64(mm_sum, mm_res);
 				}
 
 				for( ; residual_sample < end; residual_sample++) {
-					mm_res = _mm_cvtsi32_si128(residual[residual_sample]);
-					mm_res = _mm_abs_epi32(mm_res);
+					__m128i mm_res = _mm_abs_epi32(_mm_cvtsi32_si128(residual[residual_sample]));
 					mm_sum = _mm_add_epi64(mm_sum, mm_res);
 				}
 
 				mm_sum = _mm_add_epi64(mm_sum, _mm_srli_si128(mm_sum, 8));
-				_mm_storel_epi64((__m128i*)(abs_residual_partition_sums+partition), mm_sum);
+				_mm_storel_epi64((__m128i*)(void*)(abs_residual_partition_sums+partition), mm_sum);
 			}
 		}
 	}
 
 	/* now merge partitions for lower orders */
 	{
-		unsigned from_partition = 0, to_partition = partitions;
+		uint32_t from_partition = 0, to_partition = partitions;
 		int partition_order;
 		for(partition_order = (int)max_partition_order - 1; partition_order >= (int)min_partition_order; partition_order--) {
-			unsigned i;
+			uint32_t i;
 			partitions >>= 1;
 			for(i = 0; i < partitions; i++) {
 				abs_residual_partition_sums[to_partition++] =

diff --git a/src/libFLAC/window.c b/src/libFLAC/window.c
index 4387ef7..4ee6f79 100644
--- a/src/libFLAC/window.c
+++ b/src/libFLAC/window.c

@@ -1,6 +1,6 @@
 /* libFLAC - Free Lossless Audio Codec library
  * Copyright (C) 2006-2009  Josh Coalson
- * Copyright (C) 2011-2014  Xiph.Org Foundation
+ * Copyright (C) 2011-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -42,6 +42,10 @@
 
 #ifndef FLAC__INTEGER_ONLY_LIBRARY
 
+#if defined(_MSC_VER)
+// silence 25 MSVC warnings 'conversion from 'double' to 'float', possible loss of data'
+#pragma warning ( disable : 4244 )
+#endif
 
 void FLAC__window_bartlett(FLAC__real *window, const FLAC__int32 L)
 {
@@ -68,7 +72,7 @@
 	FLAC__int32 n;
 
 	for (n = 0; n < L; n++)
-		window[n] = (FLAC__real)(0.62f - 0.48f * fabs((float)n/(float)N-0.5f) - 0.38f * cos(2.0f * M_PI * ((float)n/(float)N)));
+		window[n] = (FLAC__real)(0.62f - 0.48f * fabsf((float)n/(float)N-0.5f) - 0.38f * cosf(2.0f * M_PI * ((float)n/(float)N)));
 }
 
 void FLAC__window_blackman(FLAC__real *window, const FLAC__int32 L)
@@ -77,7 +81,7 @@
 	FLAC__int32 n;
 
 	for (n = 0; n < L; n++)
-		window[n] = (FLAC__real)(0.42f - 0.5f * cos(2.0f * M_PI * n / N) + 0.08f * cos(4.0f * M_PI * n / N));
+		window[n] = (FLAC__real)(0.42f - 0.5f * cosf(2.0f * M_PI * n / N) + 0.08f * cosf(4.0f * M_PI * n / N));
 }
 
 /* 4-term -92dB side-lobe */
@@ -87,7 +91,7 @@
 	FLAC__int32 n;
 
 	for (n = 0; n <= N; n++)
-		window[n] = (FLAC__real)(0.35875f - 0.48829f * cos(2.0f * M_PI * n / N) + 0.14128f * cos(4.0f * M_PI * n / N) - 0.01168f * cos(6.0f * M_PI * n / N));
+		window[n] = (FLAC__real)(0.35875f - 0.48829f * cosf(2.0f * M_PI * n / N) + 0.14128f * cosf(4.0f * M_PI * n / N) - 0.01168f * cosf(6.0f * M_PI * n / N));
 }
 
 void FLAC__window_connes(FLAC__real *window, const FLAC__int32 L)
@@ -109,7 +113,7 @@
 	FLAC__int32 n;
 
 	for (n = 0; n < L; n++)
-		window[n] = (FLAC__real)(1.0f - 1.93f * cos(2.0f * M_PI * n / N) + 1.29f * cos(4.0f * M_PI * n / N) - 0.388f * cos(6.0f * M_PI * n / N) + 0.0322f * cos(8.0f * M_PI * n / N));
+		window[n] = (FLAC__real)(0.21557895f - 0.41663158f * cosf(2.0f * M_PI * n / N) + 0.277263158f * cosf(4.0f * M_PI * n / N) - 0.083578947f * cosf(6.0f * M_PI * n / N) + 0.006947368f * cosf(8.0f * M_PI * n / N));
 }
 
 void FLAC__window_gauss(FLAC__real *window, const FLAC__int32 L, const FLAC__real stddev)
@@ -118,9 +122,15 @@
 	const double N2 = (double)N / 2.;
 	FLAC__int32 n;
 
-	for (n = 0; n <= N; n++) {
-		const double k = ((double)n - N2) / (stddev * N2);
-		window[n] = (FLAC__real)exp(-0.5f * k * k);
+	if(!(stddev > 0.0f && stddev <= 0.5f))
+		/* stddev is not between 0 and 0.5, might be NaN.
+		 * Default to 0.5 */
+		FLAC__window_gauss(window, L, 0.25f);
+	else {
+		for (n = 0; n <= N; n++) {
+			const double k = ((double)n - N2) / (stddev * N2);
+			window[n] = (FLAC__real)exp(-0.5f * k * k);
+		}
 	}
 }
 
@@ -130,7 +140,7 @@
 	FLAC__int32 n;
 
 	for (n = 0; n < L; n++)
-		window[n] = (FLAC__real)(0.54f - 0.46f * cos(2.0f * M_PI * n / N));
+		window[n] = (FLAC__real)(0.54f - 0.46f * cosf(2.0f * M_PI * n / N));
 }
 
 void FLAC__window_hann(FLAC__real *window, const FLAC__int32 L)
@@ -139,7 +149,7 @@
 	FLAC__int32 n;
 
 	for (n = 0; n < L; n++)
-		window[n] = (FLAC__real)(0.5f - 0.5f * cos(2.0f * M_PI * n / N));
+		window[n] = (FLAC__real)(0.5f - 0.5f * cosf(2.0f * M_PI * n / N));
 }
 
 void FLAC__window_kaiser_bessel(FLAC__real *window, const FLAC__int32 L)
@@ -148,7 +158,7 @@
 	FLAC__int32 n;
 
 	for (n = 0; n < L; n++)
-		window[n] = (FLAC__real)(0.402f - 0.498f * cos(2.0f * M_PI * n / N) + 0.098f * cos(4.0f * M_PI * n / N) - 0.001f * cos(6.0f * M_PI * n / N));
+		window[n] = (FLAC__real)(0.402f - 0.498f * cosf(2.0f * M_PI * n / N) + 0.098f * cosf(4.0f * M_PI * n / N) - 0.001f * cosf(6.0f * M_PI * n / N));
 }
 
 void FLAC__window_nuttall(FLAC__real *window, const FLAC__int32 L)
@@ -157,7 +167,7 @@
 	FLAC__int32 n;
 
 	for (n = 0; n < L; n++)
-		window[n] = (FLAC__real)(0.3635819f - 0.4891775f*cos(2.0f*M_PI*n/N) + 0.1365995f*cos(4.0f*M_PI*n/N) - 0.0106411f*cos(6.0f*M_PI*n/N));
+		window[n] = (FLAC__real)(0.3635819f - 0.4891775f*cosf(2.0f*M_PI*n/N) + 0.1365995f*cosf(4.0f*M_PI*n/N) - 0.0106411f*cosf(6.0f*M_PI*n/N));
 }
 
 void FLAC__window_rectangle(FLAC__real *window, const FLAC__int32 L)
@@ -192,6 +202,10 @@
 		FLAC__window_rectangle(window, L);
 	else if (p >= 1.0)
 		FLAC__window_hann(window, L);
+	else if (!(p > 0.0f && p < 1.0f))
+		/* p is not between 0 and 1, probably NaN.
+		 * Default to 0.5 */
+		FLAC__window_tukey(window, L, 0.5f);
 	else {
 		const FLAC__int32 Np = (FLAC__int32)(p / 2.0f * L) - 1;
 		FLAC__int32 n;
@@ -200,8 +214,8 @@
 		/* ...replace ends with hann */
 		if (Np > 0) {
 			for (n = 0; n <= Np; n++) {
-				window[n] = (FLAC__real)(0.5f - 0.5f * cos(M_PI * n / Np));
-				window[L-Np-1+n] = (FLAC__real)(0.5f - 0.5f * cos(M_PI * (n+Np) / Np));
+				window[n] = (FLAC__real)(0.5f - 0.5f * cosf(M_PI * n / Np));
+				window[L-Np-1+n] = (FLAC__real)(0.5f - 0.5f * cosf(M_PI * (n+Np) / Np));
 			}
 		}
 	}
@@ -218,6 +232,10 @@
 		FLAC__window_partial_tukey(window, L, 0.05f, start, end);
 	else if (p >= 1.0f)
 		FLAC__window_partial_tukey(window, L, 0.95f, start, end);
+	else if (!(p > 0.0f && p < 1.0f))
+		/* p is not between 0 and 1, probably NaN.
+		 * Default to 0.5 */
+		FLAC__window_partial_tukey(window, L, 0.5f, start, end);
 	else {
 
 		Np = (FLAC__int32)(p / 2.0f * N);
@@ -225,11 +243,11 @@
 		for (n = 0; n < start_n && n < L; n++)
 			window[n] = 0.0f;
 		for (i = 1; n < (start_n+Np) && n < L; n++, i++)
-			window[n] = (FLAC__real)(0.5f - 0.5f * cos(M_PI * i / Np));
+			window[n] = (FLAC__real)(0.5f - 0.5f * cosf(M_PI * i / Np));
 		for (; n < (end_n-Np) && n < L; n++)
 			window[n] = 1.0f;
 		for (i = Np; n < end_n && n < L; n++, i--)
-			window[n] = (FLAC__real)(0.5f - 0.5f * cos(M_PI * i / Np));
+			window[n] = (FLAC__real)(0.5f - 0.5f * cosf(M_PI * i / Np));
 		for (; n < L; n++)
 			window[n] = 0.0f;
 	}
@@ -245,25 +263,29 @@
 		FLAC__window_punchout_tukey(window, L, 0.05f, start, end);
 	else if (p >= 1.0f)
 		FLAC__window_punchout_tukey(window, L, 0.95f, start, end);
+	else if (!(p > 0.0f && p < 1.0f))
+		/* p is not between 0 and 1, probably NaN.
+		 * Default to 0.5 */
+		FLAC__window_punchout_tukey(window, L, 0.5f, start, end);
 	else {
 
 		Ns = (FLAC__int32)(p / 2.0f * start_n);
 		Ne = (FLAC__int32)(p / 2.0f * (L - end_n));
 
 		for (n = 0, i = 1; n < Ns && n < L; n++, i++)
-			window[n] = (FLAC__real)(0.5f - 0.5f * cos(M_PI * i / Ns));
+			window[n] = (FLAC__real)(0.5f - 0.5f * cosf(M_PI * i / Ns));
 		for (; n < start_n-Ns && n < L; n++)
 			window[n] = 1.0f;
 		for (i = Ns; n < start_n && n < L; n++, i--)
-			window[n] = (FLAC__real)(0.5f - 0.5f * cos(M_PI * i / Ns));
+			window[n] = (FLAC__real)(0.5f - 0.5f * cosf(M_PI * i / Ns));
 		for (; n < end_n && n < L; n++)
 			window[n] = 0.0f;
 		for (i = 1; n < end_n+Ne && n < L; n++, i++)
-			window[n] = (FLAC__real)(0.5f - 0.5f * cos(M_PI * i / Ne));
+			window[n] = (FLAC__real)(0.5f - 0.5f * cosf(M_PI * i / Ne));
 		for (; n < L - (Ne) && n < L; n++)
 			window[n] = 1.0f;
 		for (i = Ne; n < L; n++, i--)
-			window[n] = (FLAC__real)(0.5f - 0.5f * cos(M_PI * i / Ne));
+			window[n] = (FLAC__real)(0.5f - 0.5f * cosf(M_PI * i / Ne));
 	}
 }
 
@@ -279,4 +301,8 @@
 	}
 }
 
+#if defined(_MSC_VER)
+#pragma warning ( default : 4244 )
+#endif
+
 #endif /* !defined FLAC__INTEGER_ONLY_LIBRARY */

diff --git a/src/share/win_utf8_io/win_utf8_io.c b/src/share/win_utf8_io/win_utf8_io.c
index dd22d5b..65b5699 100644
--- a/src/share/win_utf8_io/win_utf8_io.c
+++ b/src/share/win_utf8_io/win_utf8_io.c

@@ -1,5 +1,5 @@
 /* libFLAC - Free Lossless Audio Codec library
- * Copyright (C) 2013-2014  Xiph.Org Foundation
+ * Copyright (C) 2013-2022  Xiph.Org Foundation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -33,52 +33,48 @@
 #  include <config.h>
 #endif
 
-#include <stdio.h>
-#include <sys/stat.h>
-#include <sys/utime.h>
 #include <io.h>
-#include <stdlib.h>
-#include <string.h>
-#include <stdarg.h>
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h> /* for WideCharToMultiByte and MultiByteToWideChar */
-
+#include <windows.h>
 #include "share/win_utf8_io.h"
 
 #define UTF8_BUFFER_SIZE 32768
 
-static
-int local_vsnprintf(char *str, size_t size, const char *fmt, va_list va)
+#if !defined(WINAPI_FAMILY_PARTITION)
+#define WINAPI_FAMILY_PARTITION(x) x
+#define WINAPI_PARTITION_DESKTOP 1
+#endif
+
+static int local_vsnprintf(char *str, size_t size, const char *fmt, va_list va)
 {
 	int rc;
 
 #if defined _MSC_VER
 	if (size == 0)
 		return 1024;
-	rc = vsnprintf_s (str, size, _TRUNCATE, fmt, va);
+	rc = vsnprintf_s(str, size, _TRUNCATE, fmt, va);
 	if (rc < 0)
 		rc = size - 1;
 #elif defined __MINGW32__
-	rc = __mingw_vsnprintf (str, size, fmt, va);
+	rc = __mingw_vsnprintf(str, size, fmt, va);
 #else
-	rc = vsnprintf (str, size, fmt, va);
+	rc = vsnprintf(str, size, fmt, va);
 #endif
 
 	return rc;
 }
 
-static UINT win_utf8_io_codepage = CP_ACP;
-
 /* convert WCHAR stored Unicode string to UTF-8. Caller is responsible for freeing memory */
-static
-char *utf8_from_wchar(const wchar_t *wstr)
+static char *utf8_from_wchar(const wchar_t *wstr)
 {
 	char *utf8str;
 	int len;
 
-	if (!wstr) return NULL;
-	if ((len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL)) == 0) return NULL;
-	if ((utf8str = (char *)malloc(++len)) == NULL) return NULL;
+	if (!wstr)
+		return NULL;
+	if ((len = WideCharToMultiByte(CP_UTF8, 0, wstr, -1, NULL, 0, NULL, NULL)) == 0)
+		return NULL;
+	if ((utf8str = (char *)malloc(len)) == NULL)
+		return NULL;
 	if (WideCharToMultiByte(CP_UTF8, 0, wstr, -1, utf8str, len, NULL, NULL) == 0) {
 		free(utf8str);
 		utf8str = NULL;
@@ -88,21 +84,20 @@
 }
 
 /* convert UTF-8 back to WCHAR. Caller is responsible for freeing memory */
-static
-wchar_t *wchar_from_utf8(const char *str)
+static wchar_t *wchar_from_utf8(const char *str)
 {
 	wchar_t *widestr;
 	int len;
 
-	if (!str) return NULL;
-	len=(int)strlen(str)+1;
-	if ((widestr = (wchar_t *)malloc(len*sizeof(wchar_t))) != NULL) {
-		if (MultiByteToWideChar(win_utf8_io_codepage, 0, str, len, widestr, len) == 0) {
-			if (MultiByteToWideChar(CP_ACP, 0, str, len, widestr, len) == 0) { /* try conversion from Ansi in case the initial UTF-8 conversion had failed */
-				free(widestr);
-				widestr = NULL;
-			}
-		}
+	if (!str)
+		return NULL;
+	if ((len = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0)) == 0)
+		return NULL;
+	if ((widestr = (wchar_t *)malloc(len*sizeof(wchar_t))) == NULL)
+		return NULL;
+	if (MultiByteToWideChar(CP_UTF8, 0, str, -1, widestr, len) == 0) {
+		free(widestr);
+		widestr = NULL;
 	}
 
 	return widestr;
@@ -120,14 +115,23 @@
 	char **utf8argv;
 	int ret, i;
 
-	if ((handle = LoadLibraryA("msvcrt.dll")) == NULL) return 1;
-	if ((wgetmainargs = (wgetmainargs_t)GetProcAddress(handle, "__wgetmainargs")) == NULL) return 1;
+	if ((handle = LoadLibraryW(L"msvcrt.dll")) == NULL) return 1;
+	if ((wgetmainargs = (wgetmainargs_t)GetProcAddress(handle, "__wgetmainargs")) == NULL) {
+		FreeLibrary(handle);
+		return 1;
+	}
 	i = 0;
-	/* if __wgetmainargs expands wildcards then it also erroneously converts \\?\c:\path\to\file.flac to \\file.flac */
-	if (wgetmainargs(&wargc, &wargv, &wenv, 1, &i) != 0) return 1;
-	if ((utf8argv = (char **)calloc(wargc, sizeof(char*))) == NULL) return 1;
-	ret = 0;
+	/* when the 4th argument is 1,  __wgetmainargs expands wildcards but also erroneously converts \\?\c:\path\to\file.flac to \\file.flac */
+	if (wgetmainargs(&wargc, &wargv, &wenv, 1, &i) != 0) {
+		FreeLibrary(handle);
+		return 1;
+	}
+	if ((utf8argv = (char **)calloc(wargc, sizeof(char*))) == NULL) {
+		FreeLibrary(handle);
+		return 1;
+	}
 
+	ret = 0;
 	for (i=0; i<wargc; i++) {
 		if ((utf8argv[i] = utf8_from_wchar(wargv[i])) == NULL) {
 			ret = 1;
@@ -135,10 +139,9 @@
 		}
 	}
 
-	FreeLibrary(handle);
+	FreeLibrary(handle); /* do not free it when wargv or wenv are still in use */
 
 	if (ret == 0) {
-		win_utf8_io_codepage = CP_UTF8;
 		*argc = wargc;
 		*argv = utf8argv;
 	} else {
@@ -150,95 +153,112 @@
 	return ret;
 }
 
+/* similar to CreateFileW but accepts UTF-8 encoded lpFileName */
+HANDLE WINAPI CreateFile_utf8(const char *lpFileName, DWORD dwDesiredAccess, DWORD dwShareMode, LPSECURITY_ATTRIBUTES lpSecurityAttributes, DWORD dwCreationDisposition, DWORD dwFlagsAndAttributes, HANDLE hTemplateFile)
+{
+	wchar_t *wname;
+	HANDLE handle = INVALID_HANDLE_VALUE;
+
+	if ((wname = wchar_from_utf8(lpFileName)) != NULL) {
+#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
+		handle = CreateFileW(wname, dwDesiredAccess, dwShareMode, lpSecurityAttributes, dwCreationDisposition, dwFlagsAndAttributes, hTemplateFile);
+#else // !WINAPI_PARTITION_DESKTOP
+		CREATEFILE2_EXTENDED_PARAMETERS params;
+		params.dwSize = sizeof(params);
+		params.dwFileAttributes = dwFlagsAndAttributes & 0xFFFF;
+		params.dwFileFlags = dwFlagsAndAttributes & 0xFFF00000;
+		params.dwSecurityQosFlags = dwFlagsAndAttributes & 0x000F0000;
+		params.lpSecurityAttributes = lpSecurityAttributes;
+		params.hTemplateFile = hTemplateFile;
+		handle = CreateFile2(wname, dwDesiredAccess, dwShareMode, dwCreationDisposition, &params);
+#endif // !WINAPI_PARTITION_DESKTOP
+		free(wname);
+	}
+
+	return handle;
+}
+
 /* return number of characters in the UTF-8 string */
 size_t strlen_utf8(const char *str)
 {
 	size_t len;
-	if ((len = MultiByteToWideChar(win_utf8_io_codepage, 0, str, -1, NULL, 0)) == 0)
-		len = strlen(str);
-	return len;
+	len = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0); /* includes terminating null */
+	if (len != 0)
+		return len-1;
+	else
+		return strlen(str);
 }
 
 /* get the console width in characters */
 int win_get_console_width(void)
 {
 	int width = 80;
+#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
 	CONSOLE_SCREEN_BUFFER_INFO csbi;
 	HANDLE hOut = GetStdHandle(STD_OUTPUT_HANDLE);
-	if (GetConsoleScreenBufferInfo(hOut, &csbi) != 0) width = csbi.dwSize.X;
+	if(hOut != INVALID_HANDLE_VALUE && hOut != NULL)
+		if (GetConsoleScreenBufferInfo(hOut, &csbi) != 0)
+			width = csbi.dwSize.X;
+#endif // WINAPI_PARTITION_DESKTOP
 	return width;
 }
 
 /* print functions */
 
-int print_console(FILE *stream, const wchar_t *text, size_t len)
+#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
+static int wprint_console(FILE *stream, const wchar_t *text, size_t len)
 {
-	static HANDLE hOut;
-	static HANDLE hErr;
 	DWORD out;
-	hOut = GetStdHandle(STD_OUTPUT_HANDLE);
-	hErr = GetStdHandle(STD_ERROR_HANDLE);
-	if (stream == stdout && hOut != INVALID_HANDLE_VALUE && GetFileType(hOut) == FILE_TYPE_CHAR) {
-		if (WriteConsoleW(hOut, text, len, &out, NULL) == 0) return -1;
-		return out;
-	} else if (stream == stderr && hErr != INVALID_HANDLE_VALUE && GetFileType(hErr) == FILE_TYPE_CHAR) {
-		if (WriteConsoleW(hErr, text, len, &out, NULL) == 0) return -1;
-		return out;
-	} else {
-		int ret = fputws(text, stream);
-		if (ret < 0) return ret;
-		return len;
-	}
+	int ret;
+
+	do {
+		if (stream == stdout) {
+			HANDLE hOut = GetStdHandle(STD_OUTPUT_HANDLE);
+			if (hOut == INVALID_HANDLE_VALUE || hOut == NULL || GetFileType(hOut) != FILE_TYPE_CHAR)
+				break;
+			if (WriteConsoleW(hOut, text, len, &out, NULL) == 0)
+				return -1;
+			return out;
+		}
+		if (stream == stderr) {
+			HANDLE hErr = GetStdHandle(STD_ERROR_HANDLE);
+			if (hErr == INVALID_HANDLE_VALUE || hErr == NULL || GetFileType(hErr) != FILE_TYPE_CHAR)
+				break;
+			if (WriteConsoleW(hErr, text, len, &out, NULL) == 0)
+				return -1;
+			return out;
+		}
+	} while(0);
+
+	ret = fputws(text, stream);
+	if (ret < 0)
+		return ret;
+	return len;
 }
+#endif // WINAPI_PARTITION_DESKTOP
 
 int printf_utf8(const char *format, ...)
 {
-	char *utmp = NULL;
-	wchar_t *wout = NULL;
-	int ret = -1;
+	int ret;
+	va_list argptr;
+	va_start(argptr, format);
 
-	while (1) {
-		va_list argptr;
-		if (!(utmp = (char *)malloc(UTF8_BUFFER_SIZE*sizeof(char)))) break;
-		va_start(argptr, format);
-		ret = local_vsnprintf(utmp, UTF8_BUFFER_SIZE, format, argptr);
-		va_end(argptr);
-		if (ret < 0) break;
-		if (!(wout = wchar_from_utf8(utmp))) {
-			ret = -1;
-			break;
-		}
-		ret = print_console(stdout, wout, wcslen(wout));
-		break;
-	}
-	if (utmp) free(utmp);
-	if (wout) free(wout);
+	ret = vfprintf_utf8(stdout, format, argptr);
+
+	va_end(argptr);
 
 	return ret;
 }
 
 int fprintf_utf8(FILE *stream, const char *format, ...)
 {
-	char *utmp = NULL;
-	wchar_t *wout = NULL;
-	int ret = -1;
+	int ret;
+	va_list argptr;
+	va_start(argptr, format);
 
-	while (1) {
-		va_list argptr;
-		if (!(utmp = (char *)malloc(UTF8_BUFFER_SIZE*sizeof(char)))) break;
-		va_start(argptr, format);
-		ret = local_vsnprintf(utmp, UTF8_BUFFER_SIZE, format, argptr);
-		va_end(argptr);
-		if (ret < 0) break;
-		if (!(wout = wchar_from_utf8(utmp))) {
-			ret = -1;
-			break;
-		}
-		ret = print_console(stream, wout, wcslen(wout));
-		break;
-	}
-	if (utmp) free(utmp);
-	if (wout) free(wout);
+	ret = vfprintf_utf8(stream, format, argptr);
+
+	va_end(argptr);
 
 	return ret;
 }
@@ -249,43 +269,48 @@
 	wchar_t *wout = NULL;
 	int ret = -1;
 
-	while (1) {
-		if (!(utmp = (char *)malloc(UTF8_BUFFER_SIZE*sizeof(char)))) break;
-		if ((ret = local_vsnprintf(utmp, UTF8_BUFFER_SIZE, format, argptr)) < 0) break;
+	do {
+		if (!(utmp = (char *)malloc(UTF8_BUFFER_SIZE))) break;
+		if ((ret = local_vsnprintf(utmp, UTF8_BUFFER_SIZE, format, argptr)) <= 0) break;
 		if (!(wout = wchar_from_utf8(utmp))) {
 			ret = -1;
 			break;
 		}
-		ret = print_console(stream, wout, wcslen(wout));
-		break;
-	}
-	if (utmp) free(utmp);
-	if (wout) free(wout);
+#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP)
+		ret = wprint_console(stream, wout, wcslen(wout));
+#else // !WINAPI_PARTITION_DESKTOP
+		OutputDebugStringW(wout);
+		ret = 0;
+#endif // !WINAPI_PARTITION_DESKTOP
+	} while(0);
+
+	free(utmp);
+	free(wout);
 
 	return ret;
 }
 
 /* file functions */
 
-FILE *fopen_utf8(const char *filename, const char *mode)
+FILE* fopen_utf8(const char *filename, const char *mode)
 {
 	wchar_t *wname = NULL;
 	wchar_t *wmode = NULL;
 	FILE *f = NULL;
 
-	while (1) {
+	do {
 		if (!(wname = wchar_from_utf8(filename))) break;
 		if (!(wmode = wchar_from_utf8(mode))) break;
 		f = _wfopen(wname, wmode);
-		break;
-	}
-	if (wname) free(wname);
-	if (wmode) free(wmode);
+	} while(0);
+
+	free(wname);
+	free(wmode);
 
 	return f;
 }
 
-int _stat64_utf8(const char *path, struct __stat64 *buffer)
+int stat64_utf8(const char *path, struct __stat64 *buffer)
 {
 	wchar_t *wpath;
 	int ret;
@@ -315,14 +340,9 @@
 	struct __utimbuf64 ut;
 	int ret;
 
-	if (sizeof(*times) == sizeof(ut)) {
-		memcpy(&ut, times, sizeof(ut));
-	} else {
-		ut.actime = times->actime;
-		ut.modtime = times->modtime;
-	}
-
 	if (!(wname = wchar_from_utf8(filename))) return -1;
+	ut.actime = times->actime;
+	ut.modtime = times->modtime;
 	ret = _wutime64(wname, &ut);
 	free(wname);
 
@@ -347,27 +367,14 @@
 	wchar_t *wnew = NULL;
 	int ret = -1;
 
-	while (1) {
+	do {
 		if (!(wold = wchar_from_utf8(oldname))) break;
 		if (!(wnew = wchar_from_utf8(newname))) break;
 		ret = _wrename(wold, wnew);
-		break;
-	}
-	if (wold) free(wold);
-	if (wnew) free(wnew);
+	} while(0);
+
+	free(wold);
+	free(wnew);
 
 	return ret;
 }
-
-HANDLE WINAPI CreateFile_utf8(const char *lpFileName, DWORD dwDesiredAccess, DWORD dwShareMode, LPSECURITY_ATTRIBUTES lpSecurityAttributes, DWORD dwCreationDisposition, DWORD dwFlagsAndAttributes, HANDLE hTemplateFile)
-{
-	wchar_t *wname;
-	HANDLE handle = INVALID_HANDLE_VALUE;
-
-	if ((wname = wchar_from_utf8(lpFileName)) != NULL) {
-		handle = CreateFileW(wname, dwDesiredAccess, dwShareMode, lpSecurityAttributes, dwCreationDisposition, dwFlagsAndAttributes, hTemplateFile);
-		free(wname);
-	}
-
-	return handle;
-}
commit	f0889bc25fd0d154ea88841c4061a99e3113d8d7	[log] [tgz]
author	Dale Curtis <dalecurtis@chromium.org>	Tue Jan 24 11:08:24 2023 -0800
committer	Dale Curtis <dalecurtis@chromium.org>	Tue Jan 24 19:09:33 2023 +0000
tree	eabf5b3f1e35bbcd601343318583bdefa8825ae4
parent	1222ddf5718c561df4376ac8a660b50a13aa1ff5 [diff]