Merge pull request #22344 from ocpalo:libjpegturbo2.1.3

d09cc0f3 · Alexander Smorkalov · bb71cb20 · 06431655 · d09cc0f3 · d09cc0f3
196 changed file
--- a/3rdparty/libjpeg-turbo/CMakeLists.txt
+++ b/3rdparty/libjpeg-turbo/CMakeLists.txt
@@ -4,9 +4,9 @@ ocv_warnings_disable(CMAKE_C_FLAGS -Wunused-parameter -Wsign-compare -Wshorten-6

 set(VERSION_MAJOR 2)
 set(VERSION_MINOR 1)
-set(VERSION_REVISION 2)
+set(VERSION_REVISION 3)
 set(VERSION ${VERSION_MAJOR}.${VERSION_MINOR}.${VERSION_REVISION})
-set(LIBJPEG_TURBO_VERSION_NUMBER 2001002)
+set(LIBJPEG_TURBO_VERSION_NUMBER 2001003)

 string(TIMESTAMP BUILD "opencv-${OPENCV_VERSION}-libjpeg-turbo")
 if(CMAKE_BUILD_TYPE STREQUAL "Debug")
@@ -79,14 +79,13 @@ configure_file(jconfigint.h.in jconfigint.h)

 include_directories(${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/src)

-set(JPEG_SOURCES
-  jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c jcicc.c
-  jcinit.c jcmainct.c jcmarker.c jcmaster.c jcomapi.c jcparam.c jcphuff.c
-  jcprepct.c jcsample.c jctrans.c jdapimin.c jdapistd.c jdatadst.c jdatasrc.c
-  jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c jdicc.c jdinput.c jdmainct.c jdmarker.c
-  jdmaster.c jdmerge.c jdphuff.c jdpostct.c jdsample.c jdtrans.c jerror.c
-  jfdctflt.c jfdctfst.c jfdctint.c jidctflt.c jidctfst.c jidctint.c jidctred.c
-  jquant1.c jquant2.c jutils.c jmemmgr.c jmemnobs.c)
+set(JPEG_SOURCES jcapimin.c jcapistd.c jccoefct.c jccolor.c jcdctmgr.c jchuff.c
+        jcicc.c jcinit.c jcmainct.c jcmarker.c jcmaster.c jcomapi.c jcparam.c
+        jcphuff.c jcprepct.c jcsample.c jctrans.c jdapimin.c jdapistd.c jdatadst.c
+        jdatasrc.c jdcoefct.c jdcolor.c jddctmgr.c jdhuff.c jdicc.c jdinput.c
+        jdmainct.c jdmarker.c jdmaster.c jdmerge.c jdphuff.c jdpostct.c jdsample.c
+        jdtrans.c jerror.c jfdctflt.c jfdctfst.c jfdctint.c jidctflt.c jidctfst.c
+        jidctint.c jidctred.c jquant1.c jquant2.c jutils.c jmemmgr.c jmemnobs.c)

 if(WITH_ARITH_ENC OR WITH_ARITH_DEC)
  set(JPEG_SOURCES ${JPEG_SOURCES} jaricom.c)

--- a/3rdparty/libjpeg-turbo/LICENSE.md
+++ b/3rdparty/libjpeg-turbo/LICENSE.md
@@ -91,7 +91,7 @@ best of our understanding.
 The Modified (3-clause) BSD License
 ===================================

-Copyright (C)2009-2021 D. R. Commander.  All Rights Reserved.<br>
+Copyright (C)2009-2022 D. R. Commander.  All Rights Reserved.<br>
 Copyright (C)2015 Viktor Szathmáry.  All Rights Reserved.

 Redistribution and use in source and binary forms, with or without

--- a/3rdparty/libjpeg-turbo/src/jcapimin.c
+++ b/3rdparty/libjpeg-turbo/src/jcapimin.c
@@ -4,8 +4,8 @@
 * This file was part of the Independent JPEG Group's software:
 * Copyright (C) 1994-1998, Thomas G. Lane.
 * Modified 2003-2010 by Guido Vollbeding.
- * It was modified by The libjpeg-turbo Project to include only code relevant
- * to libjpeg-turbo.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, D. R. Commander.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@@ -52,7 +52,7 @@ jpeg_CreateCompress(j_compress_ptr cinfo, int version, size_t structsize)
  {
    struct jpeg_error_mgr *err = cinfo->err;
    void *client_data = cinfo->client_data; /* ignore Purify complaint here */
-    MEMZERO(cinfo, sizeof(struct jpeg_compress_struct));
+    memset(cinfo, 0, sizeof(struct jpeg_compress_struct));
    cinfo->err = err;
    cinfo->client_data = client_data;
  }

--- a/3rdparty/libjpeg-turbo/src/jcarith.c
+++ b/3rdparty/libjpeg-turbo/src/jcarith.c
@@ -4,7 +4,7 @@
 * This file was part of the Independent JPEG Group's software:
 * Developed 1997-2009 by Guido Vollbeding.
 * libjpeg-turbo Modifications:
- * Copyright (C) 2015, 2018, D. R. Commander.
+ * Copyright (C) 2015, 2018, 2021-2022, D. R. Commander.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@@ -338,14 +338,14 @@ emit_restart(j_compress_ptr cinfo, int restart_num)
    compptr = cinfo->cur_comp_info[ci];
    /* DC needs no table for refinement scan */
    if (cinfo->progressive_mode == 0 || (cinfo->Ss == 0 && cinfo->Ah == 0)) {
-      MEMZERO(entropy->dc_stats[compptr->dc_tbl_no], DC_STAT_BINS);
+      memset(entropy->dc_stats[compptr->dc_tbl_no], 0, DC_STAT_BINS);
      /* Reset DC predictions to 0 */
      entropy->last_dc_val[ci] = 0;
      entropy->dc_context[ci] = 0;
    }
    /* AC needs no table when not present */
    if (cinfo->progressive_mode == 0 || cinfo->Se) {
-      MEMZERO(entropy->ac_stats[compptr->ac_tbl_no], AC_STAT_BINS);
+      memset(entropy->ac_stats[compptr->ac_tbl_no], 0, AC_STAT_BINS);
    }
  }

@@ -836,7 +836,7 @@ start_pass(j_compress_ptr cinfo, boolean gather_statistics)
     * We are fully adaptive here and need no extra
     * statistics gathering pass!
     */
-    ERREXIT(cinfo, JERR_NOT_COMPILED);
+    ERREXIT(cinfo, JERR_NOTIMPL);

  /* We assume jcmaster.c already validated the progressive scan parameters. */

@@ -867,7 +867,7 @@ start_pass(j_compress_ptr cinfo, boolean gather_statistics)
      if (entropy->dc_stats[tbl] == NULL)
        entropy->dc_stats[tbl] = (unsigned char *)(*cinfo->mem->alloc_small)
          ((j_common_ptr)cinfo, JPOOL_IMAGE, DC_STAT_BINS);
-      MEMZERO(entropy->dc_stats[tbl], DC_STAT_BINS);
+      memset(entropy->dc_stats[tbl], 0, DC_STAT_BINS);
      /* Initialize DC predictions to 0 */
      entropy->last_dc_val[ci] = 0;
      entropy->dc_context[ci] = 0;
@@ -880,7 +880,7 @@ start_pass(j_compress_ptr cinfo, boolean gather_statistics)
      if (entropy->ac_stats[tbl] == NULL)
        entropy->ac_stats[tbl] = (unsigned char *)(*cinfo->mem->alloc_small)
          ((j_common_ptr)cinfo, JPOOL_IMAGE, AC_STAT_BINS);
-      MEMZERO(entropy->ac_stats[tbl], AC_STAT_BINS);
+      memset(entropy->ac_stats[tbl], 0, AC_STAT_BINS);
 #ifdef CALCULATE_SPECTRAL_CONDITIONING
      if (cinfo->progressive_mode)
        /* Section G.1.3.2: Set appropriate arithmetic conditioning value Kx */

--- a/3rdparty/libjpeg-turbo/src/jchuff.c
+++ b/3rdparty/libjpeg-turbo/src/jchuff.c
@@ -4,7 +4,7 @@
 * This file was part of the Independent JPEG Group's software:
 * Copyright (C) 1991-1997, Thomas G. Lane.
 * libjpeg-turbo Modifications:
- * Copyright (C) 2009-2011, 2014-2016, 2018-2021, D. R. Commander.
+ * Copyright (C) 2009-2011, 2014-2016, 2018-2022, D. R. Commander.
 * Copyright (C) 2015, Matthieu Darbois.
 * Copyright (C) 2018, Matthias Räncker.
 * Copyright (C) 2020, Arm Limited.
@@ -200,12 +200,12 @@ start_pass_huff(j_compress_ptr cinfo, boolean gather_statistics)
        entropy->dc_count_ptrs[dctbl] = (long *)
          (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
                                      257 * sizeof(long));
-      MEMZERO(entropy->dc_count_ptrs[dctbl], 257 * sizeof(long));
+      memset(entropy->dc_count_ptrs[dctbl], 0, 257 * sizeof(long));
      if (entropy->ac_count_ptrs[actbl] == NULL)
        entropy->ac_count_ptrs[actbl] = (long *)
          (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
                                      257 * sizeof(long));
-      MEMZERO(entropy->ac_count_ptrs[actbl], 257 * sizeof(long));
+      memset(entropy->ac_count_ptrs[actbl], 0, 257 * sizeof(long));
 #endif
    } else {
      /* Compute derived values for Huffman tables */
@@ -315,8 +315,8 @@ jpeg_make_c_derived_tbl(j_compress_ptr cinfo, boolean isDC, int tblno,
   * this lets us detect duplicate VAL entries here, and later
   * allows emit_bits to detect any attempt to emit such symbols.
   */
-  MEMZERO(dtbl->ehufco, sizeof(dtbl->ehufco));
-  MEMZERO(dtbl->ehufsi, sizeof(dtbl->ehufsi));
+  memset(dtbl->ehufco, 0, sizeof(dtbl->ehufco));
+  memset(dtbl->ehufsi, 0, sizeof(dtbl->ehufsi));

  /* This is also a convenient place to check for out-of-range
   * and duplicated VAL entries.  We allow 0..255 for AC symbols
@@ -478,7 +478,7 @@ dump_buffer(working_state *state)
    buffer = _buffer; \
    while (bytes > 0) { \
      bytestocopy = MIN(bytes, state->free_in_buffer); \
-      MEMCOPY(state->next_output_byte, buffer, bytestocopy); \
+      memcpy(state->next_output_byte, buffer, bytestocopy); \
      state->next_output_byte += bytestocopy; \
      buffer += bytestocopy; \
      state->free_in_buffer -= bytestocopy; \
@@ -941,8 +941,8 @@ jpeg_gen_optimal_table(j_compress_ptr cinfo, JHUFF_TBL *htbl, long freq[])

  /* This algorithm is explained in section K.2 of the JPEG standard */

-  MEMZERO(bits, sizeof(bits));
-  MEMZERO(codesize, sizeof(codesize));
+  memset(bits, 0, sizeof(bits));
+  memset(codesize, 0, sizeof(codesize));
  for (i = 0; i < 257; i++)
    others[i] = -1;             /* init links to empty */

@@ -1044,7 +1044,7 @@ jpeg_gen_optimal_table(j_compress_ptr cinfo, JHUFF_TBL *htbl, long freq[])
  bits[i]--;

  /* Return final symbol counts (only for lengths 0..16) */
-  MEMCOPY(htbl->bits, bits, sizeof(htbl->bits));
+  memcpy(htbl->bits, bits, sizeof(htbl->bits));

  /* Return a list of the symbols sorted by code length */
  /* It's not real clear to me why we don't need to consider the codelength
@@ -1083,8 +1083,8 @@ finish_pass_gather(j_compress_ptr cinfo)
  /* It's important not to apply jpeg_gen_optimal_table more than once
   * per table, because it clobbers the input frequency counts!
   */
-  MEMZERO(did_dc, sizeof(did_dc));
-  MEMZERO(did_ac, sizeof(did_ac));
+  memset(did_dc, 0, sizeof(did_dc));
+  memset(did_ac, 0, sizeof(did_ac));

  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
    compptr = cinfo->cur_comp_info[ci];

--- a/3rdparty/libjpeg-turbo/src/jcphuff.c
+++ b/3rdparty/libjpeg-turbo/src/jcphuff.c
@@ -4,7 +4,7 @@
 * This file was part of the Independent JPEG Group's software:
 * Copyright (C) 1995-1997, Thomas G. Lane.
 * libjpeg-turbo Modifications:
- * Copyright (C) 2011, 2015, 2018, 2021, D. R. Commander.
+ * Copyright (C) 2011, 2015, 2018, 2021-2022, D. R. Commander.
 * Copyright (C) 2016, 2018, Matthieu Darbois.
 * Copyright (C) 2020, Arm Limited.
 * Copyright (C) 2021, Alex Richardson.
@@ -275,7 +275,7 @@ start_pass_phuff(j_compress_ptr cinfo, boolean gather_statistics)
        entropy->count_ptrs[tbl] = (long *)
          (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
                                      257 * sizeof(long));
-      MEMZERO(entropy->count_ptrs[tbl], 257 * sizeof(long));
+      memset(entropy->count_ptrs[tbl], 0, 257 * sizeof(long));
    } else {
      /* Compute derived values for Huffman table */
      /* We may do this more than once for a table, but it's not expensive */
@@ -584,8 +584,8 @@ encode_mcu_DC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
      continue; \
    /* For a negative coef, want temp2 = bitwise complement of abs(coef) */ \
    temp2 ^= temp; \
-    values[k] = temp; \
-    values[k + DCTSIZE2] = temp2; \
+    values[k] = (JCOEF)temp; \
+    values[k + DCTSIZE2] = (JCOEF)temp2; \
    zerobits |= ((size_t)1U) << k; \
  } \
 }
@@ -1062,7 +1062,7 @@ finish_pass_gather_phuff(j_compress_ptr cinfo)
  /* It's important not to apply jpeg_gen_optimal_table more than once
   * per table, because it clobbers the input frequency counts!
   */
-  MEMZERO(did, sizeof(did));
+  memset(did, 0, sizeof(did));

  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
    compptr = cinfo->cur_comp_info[ci];

--- a/3rdparty/libjpeg-turbo/src/jcprepct.c
+++ b/3rdparty/libjpeg-turbo/src/jcprepct.c
@@ -3,8 +3,8 @@
 *
 * This file is part of the Independent JPEG Group's software:
 * Copyright (C) 1994-1996, Thomas G. Lane.
- * It was modified by The libjpeg-turbo Project to include only code relevant
- * to libjpeg-turbo.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, D. R. Commander.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@@ -289,8 +289,8 @@ create_context_buffer(j_compress_ptr cinfo)
                     cinfo->max_h_samp_factor) / compptr->h_samp_factor),
       (JDIMENSION)(3 * rgroup_height));
    /* Copy true buffer row pointers into the middle of the fake row array */
-    MEMCOPY(fake_buffer + rgroup_height, true_buffer,
-            3 * rgroup_height * sizeof(JSAMPROW));
+    memcpy(fake_buffer + rgroup_height, true_buffer,
+           3 * rgroup_height * sizeof(JSAMPROW));
    /* Fill in the above and below wraparound pointers */
    for (i = 0; i < rgroup_height; i++) {
      fake_buffer[i] = true_buffer[2 * rgroup_height + i];

--- a/3rdparty/libjpeg-turbo/src/jctrans.c
+++ b/3rdparty/libjpeg-turbo/src/jctrans.c
@@ -5,7 +5,7 @@
 * Copyright (C) 1995-1998, Thomas G. Lane.
 * Modified 2000-2009 by Guido Vollbeding.
 * libjpeg-turbo Modifications:
- * Copyright (C) 2020, D. R. Commander.
+ * Copyright (C) 2020, 2022, D. R. Commander.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@@ -100,8 +100,8 @@ jpeg_copy_critical_parameters(j_decompress_ptr srcinfo, j_compress_ptr dstinfo)
      qtblptr = &dstinfo->quant_tbl_ptrs[tblno];
      if (*qtblptr == NULL)
        *qtblptr = jpeg_alloc_quant_table((j_common_ptr)dstinfo);
-      MEMCOPY((*qtblptr)->quantval, srcinfo->quant_tbl_ptrs[tblno]->quantval,
-              sizeof((*qtblptr)->quantval));
+      memcpy((*qtblptr)->quantval, srcinfo->quant_tbl_ptrs[tblno]->quantval,
+             sizeof((*qtblptr)->quantval));
      (*qtblptr)->sent_table = FALSE;
    }
  }

--- a/3rdparty/libjpeg-turbo/src/jdapimin.c
+++ b/3rdparty/libjpeg-turbo/src/jdapimin.c
@@ -4,7 +4,7 @@
 * This file was part of the Independent JPEG Group's software:
 * Copyright (C) 1994-1998, Thomas G. Lane.
 * libjpeg-turbo Modifications:
- * Copyright (C) 2016, D. R. Commander.
+ * Copyright (C) 2016, 2022, D. R. Commander.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@@ -53,7 +53,7 @@ jpeg_CreateDecompress(j_decompress_ptr cinfo, int version, size_t structsize)
  {
    struct jpeg_error_mgr *err = cinfo->err;
    void *client_data = cinfo->client_data; /* ignore Purify complaint here */
-    MEMZERO(cinfo, sizeof(struct jpeg_decompress_struct));
+    memset(cinfo, 0, sizeof(struct jpeg_decompress_struct));
    cinfo->err = err;
    cinfo->client_data = client_data;
  }
@@ -92,7 +92,7 @@ jpeg_CreateDecompress(j_decompress_ptr cinfo, int version, size_t structsize)
  cinfo->master = (struct jpeg_decomp_master *)
    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT,
                                sizeof(my_decomp_master));
-  MEMZERO(cinfo->master, sizeof(my_decomp_master));
+  memset(cinfo->master, 0, sizeof(my_decomp_master));
 }



--- a/3rdparty/libjpeg-turbo/src/jdapistd.c
+++ b/3rdparty/libjpeg-turbo/src/jdapistd.c
@@ -4,7 +4,7 @@
 * This file was part of the Independent JPEG Group's software:
 * Copyright (C) 1994-1996, Thomas G. Lane.
 * libjpeg-turbo Modifications:
- * Copyright (C) 2010, 2015-2020, D. R. Commander.
+ * Copyright (C) 2010, 2015-2020, 2022, D. R. Commander.
 * Copyright (C) 2015, Google, Inc.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
@@ -159,6 +159,7 @@ jpeg_crop_scanline(j_decompress_ptr cinfo, JDIMENSION *xoffset,
  JDIMENSION input_xoffset;
  boolean reinit_upsampler = FALSE;
  jpeg_component_info *compptr;
+  my_master_ptr master = (my_master_ptr)cinfo->master;

  if (cinfo->global_state != DSTATE_SCANNING || cinfo->output_scanline != 0)
    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
@@ -208,6 +209,11 @@ jpeg_crop_scanline(j_decompress_ptr cinfo, JDIMENSION *xoffset,
   */
  *width = *width + input_xoffset - *xoffset;
  cinfo->output_width = *width;
+  if (master->using_merged_upsample && cinfo->max_v_samp_factor == 2) {
+    my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
+    upsample->out_row_width =
+      cinfo->output_width * cinfo->out_color_components;
+  }

  /* Set the first and last iMCU columns that we must decompress.  These values
   * will be used in single-scan decompressions.

--- a/3rdparty/libjpeg-turbo/src/jdarith.c
+++ b/3rdparty/libjpeg-turbo/src/jdarith.c
@@ -4,7 +4,7 @@
 * This file was part of the Independent JPEG Group's software:
 * Developed 1997-2015 by Guido Vollbeding.
 * libjpeg-turbo Modifications:
- * Copyright (C) 2015-2020, D. R. Commander.
+ * Copyright (C) 2015-2020, 2022, D. R. Commander.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@@ -210,13 +210,13 @@ process_restart(j_decompress_ptr cinfo)
  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
    compptr = cinfo->cur_comp_info[ci];
    if (!cinfo->progressive_mode || (cinfo->Ss == 0 && cinfo->Ah == 0)) {
-      MEMZERO(entropy->dc_stats[compptr->dc_tbl_no], DC_STAT_BINS);
+      memset(entropy->dc_stats[compptr->dc_tbl_no], 0, DC_STAT_BINS);
      /* Reset DC predictions to 0 */
      entropy->last_dc_val[ci] = 0;
      entropy->dc_context[ci] = 0;
    }
    if (!cinfo->progressive_mode || cinfo->Ss) {
-      MEMZERO(entropy->ac_stats[compptr->ac_tbl_no], AC_STAT_BINS);
+      memset(entropy->ac_stats[compptr->ac_tbl_no], 0, AC_STAT_BINS);
    }
  }

@@ -471,17 +471,17 @@ decode_mcu_AC_refine(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
      if (*thiscoef) {                          /* previously nonzero coef */
        if (arith_decode(cinfo, st + 2)) {
          if (*thiscoef < 0)
-            *thiscoef += m1;
+            *thiscoef += (JCOEF)m1;
          else
-            *thiscoef += p1;
+            *thiscoef += (JCOEF)p1;
        }
        break;
      }
      if (arith_decode(cinfo, st + 1)) {        /* newly nonzero coef */
        if (arith_decode(cinfo, entropy->fixed_bin))
-          *thiscoef = m1;
+          *thiscoef = (JCOEF)m1;
        else
-          *thiscoef = p1;
+          *thiscoef = (JCOEF)p1;
        break;
      }
      st += 3;  k++;
@@ -698,8 +698,8 @@ bad:
    /* Check that the scan parameters Ss, Se, Ah/Al are OK for sequential JPEG.
     * This ought to be an error condition, but we make it a warning.
     */
-    if (cinfo->Ss != 0 || cinfo->Ah != 0 || cinfo->Al != 0 ||
-        (cinfo->Se < DCTSIZE2 && cinfo->Se != DCTSIZE2 - 1))
+    if (cinfo->Ss != 0 || cinfo->Se != DCTSIZE2 - 1 ||
+        cinfo->Ah != 0 || cinfo->Al != 0)
      WARNMS(cinfo, JWRN_NOT_SEQUENTIAL);
    /* Select MCU decoding routine */
    entropy->pub.decode_mcu = decode_mcu;
@@ -715,7 +715,7 @@ bad:
      if (entropy->dc_stats[tbl] == NULL)
        entropy->dc_stats[tbl] = (unsigned char *)(*cinfo->mem->alloc_small)
          ((j_common_ptr)cinfo, JPOOL_IMAGE, DC_STAT_BINS);
-      MEMZERO(entropy->dc_stats[tbl], DC_STAT_BINS);
+      memset(entropy->dc_stats[tbl], 0, DC_STAT_BINS);
      /* Initialize DC predictions to 0 */
      entropy->last_dc_val[ci] = 0;
      entropy->dc_context[ci] = 0;
@@ -727,7 +727,7 @@ bad:
      if (entropy->ac_stats[tbl] == NULL)
        entropy->ac_stats[tbl] = (unsigned char *)(*cinfo->mem->alloc_small)
          ((j_common_ptr)cinfo, JPOOL_IMAGE, AC_STAT_BINS);
-      MEMZERO(entropy->ac_stats[tbl], AC_STAT_BINS);
+      memset(entropy->ac_stats[tbl], 0, AC_STAT_BINS);
    }
  }


--- a/3rdparty/libjpeg-turbo/src/jdatadst.c
+++ b/3rdparty/libjpeg-turbo/src/jdatadst.c
@@ -5,7 +5,7 @@
 * Copyright (C) 1994-1996, Thomas G. Lane.
 * Modified 2009-2012 by Guido Vollbeding.
 * libjpeg-turbo Modifications:
- * Copyright (C) 2013, 2016, D. R. Commander.
+ * Copyright (C) 2013, 2016, 2022, D. R. Commander.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@@ -23,11 +23,6 @@
 #include "jpeglib.h"
 #include "jerror.h"

-#ifndef HAVE_STDLIB_H           /* <stdlib.h> should declare malloc(),free() */
-extern void *malloc(size_t size);
-extern void free(void *ptr);
-#endif
-

 /* Expanded data destination object for stdio output */

@@ -116,7 +111,7 @@ empty_output_buffer(j_compress_ptr cinfo)
 {
  my_dest_ptr dest = (my_dest_ptr)cinfo->dest;

-  if (JFWRITE(dest->outfile, dest->buffer, OUTPUT_BUF_SIZE) !=
+  if (fwrite(dest->buffer, 1, OUTPUT_BUF_SIZE, dest->outfile) !=
      (size_t)OUTPUT_BUF_SIZE)
    ERREXIT(cinfo, JERR_FILE_WRITE);

@@ -141,7 +136,7 @@ empty_mem_output_buffer(j_compress_ptr cinfo)
  if (nextbuffer == NULL)
    ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10);

-  MEMCOPY(nextbuffer, dest->buffer, dest->bufsize);
+  memcpy(nextbuffer, dest->buffer, dest->bufsize);

  free(dest->newbuffer);

@@ -175,7 +170,7 @@ term_destination(j_compress_ptr cinfo)

  /* Write any data remaining in the buffer */
  if (datacount > 0) {
-    if (JFWRITE(dest->outfile, dest->buffer, datacount) != datacount)
+    if (fwrite(dest->buffer, 1, datacount, dest->outfile) != datacount)
      ERREXIT(cinfo, JERR_FILE_WRITE);
  }
  fflush(dest->outfile);

--- a/3rdparty/libjpeg-turbo/src/jdatasrc.c
+++ b/3rdparty/libjpeg-turbo/src/jdatasrc.c
@@ -5,7 +5,7 @@
 * Copyright (C) 1994-1996, Thomas G. Lane.
 * Modified 2009-2011 by Guido Vollbeding.
 * libjpeg-turbo Modifications:
- * Copyright (C) 2013, 2016, D. R. Commander.
+ * Copyright (C) 2013, 2016, 2022, D. R. Commander.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@@ -104,7 +104,7 @@ fill_input_buffer(j_decompress_ptr cinfo)
  my_src_ptr src = (my_src_ptr)cinfo->src;
  size_t nbytes;

-  nbytes = JFREAD(src->infile, src->buffer, INPUT_BUF_SIZE);
+  nbytes = fread(src->buffer, 1, INPUT_BUF_SIZE, src->infile);

  if (nbytes <= 0) {
    if (src->start_of_file)     /* Treat empty input file as fatal error */

--- a/3rdparty/libjpeg-turbo/src/jddctmgr.c
+++ b/3rdparty/libjpeg-turbo/src/jddctmgr.c
@@ -6,7 +6,7 @@
 * Modified 2002-2010 by Guido Vollbeding.
 * libjpeg-turbo Modifications:
 * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
- * Copyright (C) 2010, 2015, D. R. Commander.
+ * Copyright (C) 2010, 2015, 2022, D. R. Commander.
 * Copyright (C) 2013, MIPS Technologies, Inc., California.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
@@ -345,7 +345,7 @@ jinit_inverse_dct(j_decompress_ptr cinfo)
    compptr->dct_table =
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
                                  sizeof(multiplier_table));
-    MEMZERO(compptr->dct_table, sizeof(multiplier_table));
+    memset(compptr->dct_table, 0, sizeof(multiplier_table));
    /* Mark multiplier table not yet set up for any method */
    idct->cur_method[ci] = -1;
  }

--- a/3rdparty/libjpeg-turbo/src/jdicc.c
+++ b/3rdparty/libjpeg-turbo/src/jdicc.c
@@ -18,10 +18,6 @@
 #include "jpeglib.h"
 #include "jerror.h"

-#ifndef HAVE_STDLIB_H           /* <stdlib.h> should declare malloc() */
-extern void *malloc(size_t size);
-#endif
-

 #define ICC_MARKER  (JPEG_APP0 + 2)     /* JPEG marker code for ICC */
 #define ICC_OVERHEAD_LEN  14            /* size of non-profile data in APP2 */

--- a/3rdparty/libjpeg-turbo/src/jdinput.c
+++ b/3rdparty/libjpeg-turbo/src/jdinput.c
@@ -4,7 +4,7 @@
 * This file was part of the Independent JPEG Group's software:
 * Copyright (C) 1991-1997, Thomas G. Lane.
 * libjpeg-turbo Modifications:
- * Copyright (C) 2010, 2016, 2018, D. R. Commander.
+ * Copyright (C) 2010, 2016, 2018, 2022, D. R. Commander.
 * Copyright (C) 2015, Google, Inc.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
@@ -264,7 +264,7 @@ latch_quant_tables(j_decompress_ptr cinfo)
    qtbl = (JQUANT_TBL *)
      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
                                  sizeof(JQUANT_TBL));
-    MEMCOPY(qtbl, cinfo->quant_tbl_ptrs[qtblno], sizeof(JQUANT_TBL));
+    memcpy(qtbl, cinfo->quant_tbl_ptrs[qtblno], sizeof(JQUANT_TBL));
    compptr->quant_table = qtbl;
  }
 }

--- a/3rdparty/libjpeg-turbo/src/jdmarker.c
+++ b/3rdparty/libjpeg-turbo/src/jdmarker.c
@@ -4,7 +4,7 @@
 * This file was part of the Independent JPEG Group's software:
 * Copyright (C) 1991-1998, Thomas G. Lane.
 * libjpeg-turbo Modifications:
- * Copyright (C) 2012, 2015, D. R. Commander.
+ * Copyright (C) 2012, 2015, 2022, D. R. Commander.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@@ -473,7 +473,7 @@ get_dht(j_decompress_ptr cinfo)
    for (i = 0; i < count; i++)
      INPUT_BYTE(cinfo, huffval[i], return FALSE);

-    MEMZERO(&huffval[count], (256 - count) * sizeof(UINT8));
+    memset(&huffval[count], 0, (256 - count) * sizeof(UINT8));

    length -= count;

@@ -491,8 +491,8 @@ get_dht(j_decompress_ptr cinfo)
    if (*htblptr == NULL)
      *htblptr = jpeg_alloc_huff_table((j_common_ptr)cinfo);

-    MEMCOPY((*htblptr)->bits, bits, sizeof((*htblptr)->bits));
-    MEMCOPY((*htblptr)->huffval, huffval, sizeof((*htblptr)->huffval));
+    memcpy((*htblptr)->bits, bits, sizeof((*htblptr)->bits));
+    memcpy((*htblptr)->huffval, huffval, sizeof((*htblptr)->huffval));
  }

  if (length != 0)

--- a/3rdparty/libjpeg-turbo/src/jdmaster.c
+++ b/3rdparty/libjpeg-turbo/src/jdmaster.c
@@ -5,7 +5,7 @@
 * Copyright (C) 1991-1997, Thomas G. Lane.
 * Modified 2002-2009 by Guido Vollbeding.
 * libjpeg-turbo Modifications:
- * Copyright (C) 2009-2011, 2016, 2019, D. R. Commander.
+ * Copyright (C) 2009-2011, 2016, 2019, 2022, D. R. Commander.
 * Copyright (C) 2013, Linaro Limited.
 * Copyright (C) 2015, Google, Inc.
 * For conditions of distribution and use, see the accompanying README.ijg
@@ -417,7 +417,7 @@ prepare_range_limit_table(j_decompress_ptr cinfo)
  table += (MAXJSAMPLE + 1);    /* allow negative subscripts of simple table */
  cinfo->sample_range_limit = table;
  /* First segment of "simple" table: limit[x] = 0 for x < 0 */
-  MEMZERO(table - (MAXJSAMPLE + 1), (MAXJSAMPLE + 1) * sizeof(JSAMPLE));
+  memset(table - (MAXJSAMPLE + 1), 0, (MAXJSAMPLE + 1) * sizeof(JSAMPLE));
  /* Main part of "simple" table: limit[x] = x */
  for (i = 0; i <= MAXJSAMPLE; i++)
    table[i] = (JSAMPLE)i;
@@ -426,10 +426,10 @@ prepare_range_limit_table(j_decompress_ptr cinfo)
  for (i = CENTERJSAMPLE; i < 2 * (MAXJSAMPLE + 1); i++)
    table[i] = MAXJSAMPLE;
  /* Second half of post-IDCT table */
-  MEMZERO(table + (2 * (MAXJSAMPLE + 1)),
-          (2 * (MAXJSAMPLE + 1) - CENTERJSAMPLE) * sizeof(JSAMPLE));
-  MEMCOPY(table + (4 * (MAXJSAMPLE + 1) - CENTERJSAMPLE),
-          cinfo->sample_range_limit, CENTERJSAMPLE * sizeof(JSAMPLE));
+  memset(table + (2 * (MAXJSAMPLE + 1)), 0,
+         (2 * (MAXJSAMPLE + 1) - CENTERJSAMPLE) * sizeof(JSAMPLE));
+  memcpy(table + (4 * (MAXJSAMPLE + 1) - CENTERJSAMPLE),
+         cinfo->sample_range_limit, CENTERJSAMPLE * sizeof(JSAMPLE));
 }



--- a/3rdparty/libjpeg-turbo/src/jdphuff.c
+++ b/3rdparty/libjpeg-turbo/src/jdphuff.c
@@ -4,7 +4,7 @@
 * This file was part of the Independent JPEG Group's software:
 * Copyright (C) 1995-1997, Thomas G. Lane.
 * libjpeg-turbo Modifications:
- * Copyright (C) 2015-2016, 2018-2021, D. R. Commander.
+ * Copyright (C) 2015-2016, 2018-2022, D. R. Commander.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@@ -578,9 +578,9 @@ decode_mcu_AC_refine(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
            if (GET_BITS(1)) {
              if ((*thiscoef & p1) == 0) { /* do nothing if already set it */
                if (*thiscoef >= 0)
-                  *thiscoef += p1;
+                  *thiscoef += (JCOEF)p1;
                else
-                  *thiscoef += m1;
+                  *thiscoef += (JCOEF)m1;
              }
            }
          } else {
@@ -612,9 +612,9 @@ decode_mcu_AC_refine(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
          if (GET_BITS(1)) {
            if ((*thiscoef & p1) == 0) { /* do nothing if already changed it */
              if (*thiscoef >= 0)
-                *thiscoef += p1;
+                *thiscoef += (JCOEF)p1;
              else
-                *thiscoef += m1;
+                *thiscoef += (JCOEF)m1;
            }
          }
        }

--- a/3rdparty/libjpeg-turbo/src/jerror.c
+++ b/3rdparty/libjpeg-turbo/src/jerror.c
@@ -3,8 +3,8 @@
 *
 * This file was part of the Independent JPEG Group's software:
 * Copyright (C) 1991-1998, Thomas G. Lane.
- * It was modified by The libjpeg-turbo Project to include only code relevant
- * to libjpeg-turbo.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, D. R. Commander.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@@ -189,13 +189,13 @@ format_message(j_common_ptr cinfo, char *buffer)

  /* Format the message into the passed buffer */
  if (isstring)
-    sprintf(buffer, msgtext, err->msg_parm.s);
+    snprintf(buffer, JMSG_LENGTH_MAX, msgtext, err->msg_parm.s);
  else
-    sprintf(buffer, msgtext,
-            err->msg_parm.i[0], err->msg_parm.i[1],
-            err->msg_parm.i[2], err->msg_parm.i[3],
-            err->msg_parm.i[4], err->msg_parm.i[5],
-            err->msg_parm.i[6], err->msg_parm.i[7]);
+    snprintf(buffer, JMSG_LENGTH_MAX, msgtext,
+             err->msg_parm.i[0], err->msg_parm.i[1],
+             err->msg_parm.i[2], err->msg_parm.i[3],
+             err->msg_parm.i[4], err->msg_parm.i[5],
+             err->msg_parm.i[6], err->msg_parm.i[7]);
 }



--- a/3rdparty/libjpeg-turbo/src/jerror.h
+++ b/3rdparty/libjpeg-turbo/src/jerror.h
@@ -5,7 +5,7 @@
 * Copyright (C) 1994-1997, Thomas G. Lane.
 * Modified 1997-2009 by Guido Vollbeding.
 * libjpeg-turbo Modifications:
- * Copyright (C) 2014, 2017, D. R. Commander.
+ * Copyright (C) 2014, 2017, 2021-2022, D. R. Commander.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@@ -103,7 +103,7 @@ JMESSAGE(JERR_MISMATCHED_QUANT_TABLE,
         "Cannot transcode due to multiple use of quantization table %d")
 JMESSAGE(JERR_MISSING_DATA, "Scan script does not transmit all data")
 JMESSAGE(JERR_MODE_CHANGE, "Invalid color quantization mode change")
-JMESSAGE(JERR_NOTIMPL, "Not implemented yet")
+JMESSAGE(JERR_NOTIMPL, "Requested features are incompatible")
 JMESSAGE(JERR_NOT_COMPILED, "Requested feature was omitted at compile time")
 #if JPEG_LIB_VERSION >= 70
 JMESSAGE(JERR_NO_ARITH_TABLE, "Arithmetic table 0x%02x was not defined")
@@ -268,6 +268,7 @@ JMESSAGE(JERR_BAD_DROP_SAMPLING,
 #define ERREXITS(cinfo, code, str) \
  ((cinfo)->err->msg_code = (code), \
   strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \
+   (cinfo)->err->msg_parm.s[JMSG_STR_PARM_MAX - 1] = '\0', \
   (*(cinfo)->err->error_exit) ((j_common_ptr)(cinfo)))

 #define MAKESTMT(stuff)         do { stuff } while (0)
@@ -324,6 +325,7 @@ JMESSAGE(JERR_BAD_DROP_SAMPLING,
 #define TRACEMSS(cinfo, lvl, code, str) \
  ((cinfo)->err->msg_code = (code), \
   strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \
+   (cinfo)->err->msg_parm.s[JMSG_STR_PARM_MAX - 1] = '\0', \
   (*(cinfo)->err->emit_message) ((j_common_ptr)(cinfo), (lvl)))

 #endif /* JERROR_H */
--- a/3rdparty/libjpeg-turbo/src/jinclude.h
+++ b/3rdparty/libjpeg-turbo/src/jinclude.h
@@ -3,8 +3,8 @@
 *
 * This file was part of the Independent JPEG Group's software:
 * Copyright (C) 1991-1994, Thomas G. Lane.
- * It was modified by The libjpeg-turbo Project to include only code relevant
- * to libjpeg-turbo.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, D. R. Commander.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@@ -17,72 +17,117 @@
 * JPEG library.  Most applications need only include jpeglib.h.
 */

+#ifndef __JINCLUDE_H__
+#define __JINCLUDE_H__

 /* Include auto-config file to find out which system include files we need. */

 #include "jconfig.h"            /* auto configuration options */
+#include "jconfigint.h"
 #define JCONFIG_INCLUDED        /* so that jpeglib.h doesn't do it again */

 /*
- * We need the NULL macro and size_t typedef.
- * On an ANSI-conforming system it is sufficient to include <stddef.h>.
- * Otherwise, we get them from <stdlib.h> or <stdio.h>; we may have to
- * pull in <sys/types.h> as well.
 * Note that the core JPEG library does not require <stdio.h>;
 * only the default error handler and data source/destination modules do.
 * But we must pull it in because of the references to FILE in jpeglib.h.
 * You can remove those references if you want to compile without <stdio.h>.
 */

-#ifdef HAVE_STDDEF_H
 #include <stddef.h>
-#endif
-
-#ifdef HAVE_STDLIB_H
 #include <stdlib.h>
-#endif
-
-#ifdef NEED_SYS_TYPES_H
-#include <sys/types.h>
-#endif
-
 #include <stdio.h>
+#include <string.h>

 /*
- * We need memory copying and zeroing functions, plus strncpy().
- * ANSI and System V implementations declare these in <string.h>.
- * BSD doesn't have the mem() functions, but it does have bcopy()/bzero().
- * Some systems may declare memset and memcpy in <memory.h>.
- *
- * NOTE: we assume the size parameters to these functions are of type size_t.
- * Change the casts in these macros if not!
+ * These macros/inline functions facilitate using Microsoft's "safe string"
+ * functions with Visual Studio builds without the need to scatter #ifdefs
+ * throughout the code base.
 */

-#ifdef NEED_BSD_STRINGS

-#include <strings.h>
-#define MEMZERO(target, size) \
-  bzero((void *)(target), (size_t)(size))
-#define MEMCOPY(dest, src, size) \
-  bcopy((const void *)(src), (void *)(dest), (size_t)(size))
+#ifndef NO_GETENV

-#else /* not BSD, assume ANSI/SysV string lib */
+#ifdef _MSC_VER

-#include <string.h>
-#define MEMZERO(target, size) \
-  memset((void *)(target), 0, (size_t)(size))
-#define MEMCOPY(dest, src, size) \
-  memcpy((void *)(dest), (const void *)(src), (size_t)(size))
+static INLINE int GETENV_S(char *buffer, size_t buffer_size, const char *name)
+{
+  size_t required_size;

-#endif
+  return (int)getenv_s(&required_size, buffer, buffer_size, name);
+}

-/*
- * The modules that use fread() and fwrite() always invoke them through
- * these macros.  On some systems you may need to twiddle the argument casts.
- * CAUTION: argument order is different from underlying functions!
+#else /* _MSC_VER */
+
+#include <errno.h>
+
+/* This provides a similar interface to the Microsoft/C11 getenv_s() function,
+ * but other than parameter validation, it has no advantages over getenv().
+ */
+
+static INLINE int GETENV_S(char *buffer, size_t buffer_size, const char *name)
+{
+  char *env;
+
+  if (!buffer) {
+    if (buffer_size == 0)
+      return 0;
+    else
+      return (errno = EINVAL);
+  }
+  if (buffer_size == 0)
+    return (errno = EINVAL);
+  if (!name) {
+    *buffer = 0;
+    return 0;
+  }
+
+  env = getenv(name);
+  if (!env)
+  {
+    *buffer = 0;
+    return 0;
+  }
+
+  if (strlen(env) + 1 > buffer_size) {
+    *buffer = 0;
+    return ERANGE;
+  }
+
+  strncpy(buffer, env, buffer_size);
+
+  return 0;
+}
+
+#endif /* _MSC_VER */
+
+#endif /* NO_GETENV */
+
+
+#ifndef NO_PUTENV
+
+#ifdef _WIN32
+
+#define PUTENV_S(name, value)  _putenv_s(name, value)
+
+#else
+
+/* This provides a similar interface to the Microsoft _putenv_s() function, but
+ * other than parameter validation, it has no advantages over setenv().
 */

-#define JFREAD(file, buf, sizeofbuf) \
-  ((size_t)fread((void *)(buf), (size_t)1, (size_t)(sizeofbuf), (file)))
-#define JFWRITE(file, buf, sizeofbuf) \
-  ((size_t)fwrite((const void *)(buf), (size_t)1, (size_t)(sizeofbuf), (file)))
+static INLINE int PUTENV_S(const char *name, const char *value)
+{
+  if (!name || !value)
+    return (errno = EINVAL);
+
+  setenv(name, value, 1);
+
+  return errno;
+}
+
+#endif /* _WIN32 */
+
+#endif /* NO_PUTENV */
+
+
+#endif /* JINCLUDE_H */
--- a/3rdparty/libjpeg-turbo/src/jmemmgr.c
+++ b/3rdparty/libjpeg-turbo/src/jmemmgr.c
@@ -4,7 +4,7 @@
 * This file was part of the Independent JPEG Group's software:
 * Copyright (C) 1991-1997, Thomas G. Lane.
 * libjpeg-turbo Modifications:
- * Copyright (C) 2016, 2021, D. R. Commander.
+ * Copyright (C) 2016, 2021-2022, D. R. Commander.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@@ -37,12 +37,6 @@
 #endif
 #include <limits.h>

-#ifndef NO_GETENV
-#ifndef HAVE_STDLIB_H           /* <stdlib.h> should declare getenv() */
-extern char *getenv(const char *name);
-#endif
-#endif
-

 LOCAL(size_t)
 round_up_pow2(size_t a, size_t b)
@@ -1162,12 +1156,16 @@ jinit_memory_mgr(j_common_ptr cinfo)
   */
 #ifndef NO_GETENV
  {
-    char *memenv;
+    char memenv[30] = { 0 };

-    if ((memenv = getenv("JPEGMEM")) != NULL) {
+    if (!GETENV_S(memenv, 30, "JPEGMEM") && strlen(memenv) > 0) {
      char ch = 'x';

+#ifdef _MSC_VER
+      if (sscanf_s(memenv, "%ld%c", &max_to_use, &ch, 1) > 0) {
+#else
      if (sscanf(memenv, "%ld%c", &max_to_use, &ch) > 0) {
+#endif
        if (ch == 'm' || ch == 'M')
          max_to_use *= 1000L;
        mem->pub.max_memory_to_use = max_to_use * 1000L;

--- a/3rdparty/libjpeg-turbo/src/jmemnobs.c
+++ b/3rdparty/libjpeg-turbo/src/jmemnobs.c
@@ -22,11 +22,6 @@
 #include "jpeglib.h"
 #include "jmemsys.h"            /* import the system-dependent declarations */

-#ifndef HAVE_STDLIB_H           /* <stdlib.h> should declare malloc(),free() */
-extern void *malloc(size_t size);
-extern void free(void *ptr);
-#endif
-

 /*
 * Memory allocation and freeing are controlled by the regular library

--- a/3rdparty/libjpeg-turbo/src/jmorecfg.h
+++ b/3rdparty/libjpeg-turbo/src/jmorecfg.h
@@ -100,11 +100,7 @@ typedef unsigned char UINT8;

 /* UINT16 must hold at least the values 0..65535. */

-#ifdef HAVE_UNSIGNED_SHORT
 typedef unsigned short UINT16;
-#else /* not HAVE_UNSIGNED_SHORT */
-typedef unsigned int UINT16;
-#endif /* HAVE_UNSIGNED_SHORT */

 /* INT16 must hold at least the values -32768..32767. */


--- a/3rdparty/libjpeg-turbo/src/jpegint.h
+++ b/3rdparty/libjpeg-turbo/src/jpegint.h
@@ -373,12 +373,3 @@ extern const int jpeg_natural_order[]; /* zigzag coef order to natural order */

 /* Arithmetic coding probability estimation tables in jaricom.c */
 extern const JLONG jpeg_aritab[];
-
-/* Suppress undefined-structure complaints if necessary. */
-
-#ifdef INCOMPLETE_TYPES_BROKEN
-#ifndef AM_MEMORY_MANAGER       /* only jmemmgr.c defines these */
-struct jvirt_sarray_control { long dummy; };
-struct jvirt_barray_control { long dummy; };
-#endif
-#endif /* INCOMPLETE_TYPES_BROKEN */
--- a/3rdparty/libjpeg-turbo/src/jstdhuff.c
+++ b/3rdparty/libjpeg-turbo/src/jstdhuff.c
@@ -4,7 +4,7 @@
 * This file was part of the Independent JPEG Group's software:
 * Copyright (C) 1991-1998, Thomas G. Lane.
 * libjpeg-turbo Modifications:
- * Copyright (C) 2013, D. R. Commander.
+ * Copyright (C) 2013, 2022, D. R. Commander.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@@ -29,7 +29,7 @@ add_huff_table(j_common_ptr cinfo, JHUFF_TBL **htblptr, const UINT8 *bits,
    return;

  /* Copy the number-of-symbols-of-each-code-length counts */
-  MEMCOPY((*htblptr)->bits, bits, sizeof((*htblptr)->bits));
+  memcpy((*htblptr)->bits, bits, sizeof((*htblptr)->bits));

  /* Validate the counts.  We do this here mainly so we can copy the right
   * number of symbols from the val[] array, without risking marching off
@@ -41,8 +41,9 @@ add_huff_table(j_common_ptr cinfo, JHUFF_TBL **htblptr, const UINT8 *bits,
  if (nsymbols < 1 || nsymbols > 256)
    ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);

-  MEMCOPY((*htblptr)->huffval, val, nsymbols * sizeof(UINT8));
-  MEMZERO(&((*htblptr)->huffval[nsymbols]), (256 - nsymbols) * sizeof(UINT8));
+  memcpy((*htblptr)->huffval, val, nsymbols * sizeof(UINT8));
+  memset(&((*htblptr)->huffval[nsymbols]), 0,
+         (256 - nsymbols) * sizeof(UINT8));

  /* Initialize sent_table FALSE so table will be written to JPEG file. */
  (*htblptr)->sent_table = FALSE;

--- a/3rdparty/libjpeg-turbo/src/jutils.c
+++ b/3rdparty/libjpeg-turbo/src/jutils.c
@@ -3,8 +3,8 @@
 *
 * This file was part of the Independent JPEG Group's software:
 * Copyright (C) 1991-1996, Thomas G. Lane.
- * It was modified by The libjpeg-turbo Project to include only code
- * relevant to libjpeg-turbo.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, D. R. Commander.
 * For conditions of distribution and use, see the accompanying README.ijg
 * file.
 *
@@ -110,7 +110,7 @@ jcopy_sample_rows(JSAMPARRAY input_array, int source_row,
  for (row = num_rows; row > 0; row--) {
    inptr = *input_array++;
    outptr = *output_array++;
-    MEMCOPY(outptr, inptr, count);
+    memcpy(outptr, inptr, count);
  }
 }

@@ -120,7 +120,7 @@ jcopy_block_row(JBLOCKROW input_row, JBLOCKROW output_row,
                JDIMENSION num_blocks)
 /* Copy a row of coefficient blocks from one place to another. */
 {
-  MEMCOPY(output_row, input_row, num_blocks * (DCTSIZE2 * sizeof(JCOEF)));
+  memcpy(output_row, input_row, num_blocks * (DCTSIZE2 * sizeof(JCOEF)));
 }


@@ -129,5 +129,5 @@ jzero_far(void *target, size_t bytestozero)
 /* Zero out a chunk of memory. */
 /* This might be sample-array data, block-array data, or alloc_large data. */
 {
-  MEMZERO(target, bytestozero);
+  memset(target, 0, bytestozero);
 }
--- a/3rdparty/libjpeg-turbo/src/jversion.h.in
+++ b/3rdparty/libjpeg-turbo/src/jversion.h.in
+/*
+ * jversion.h
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-2020, Thomas G. Lane, Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2010, 2012-2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains software version identification.
+ */
+
+
+#if JPEG_LIB_VERSION >= 80
+
+#define JVERSION        "8d  15-Jan-2012"
+
+#elif JPEG_LIB_VERSION >= 70
+
+#define JVERSION        "7  27-Jun-2009"
+
+#else
+
+#define JVERSION        "6b  27-Mar-1998"
+
+#endif
+
+/*
+ * NOTE: It is our convention to place the authors in the following order:
+ * - libjpeg-turbo authors (2009-) in descending order of the date of their
+ *   most recent contribution to the project, then in ascending order of the
+ *   date of their first contribution to the project, then in alphabetical
+ *   order
+ * - Upstream authors in descending order of the date of the first inclusion of
+ *   their code
+ */
+
+#define JCOPYRIGHT \
+  "Copyright (C) 2009-2022 D. R. Commander\n" \
+  "Copyright (C) 2015, 2020 Google, Inc.\n" \
+  "Copyright (C) 2019-2020 Arm Limited\n" \
+  "Copyright (C) 2015-2016, 2018 Matthieu Darbois\n" \
+  "Copyright (C) 2011-2016 Siarhei Siamashka\n" \
+  "Copyright (C) 2015 Intel Corporation\n" \
+  "Copyright (C) 2013-2014 Linaro Limited\n" \
+  "Copyright (C) 2013-2014 MIPS Technologies, Inc.\n" \
+  "Copyright (C) 2009, 2012 Pierre Ossman for Cendio AB\n" \
+  "Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)\n" \
+  "Copyright (C) 1999-2006 MIYASAKA Masaru\n" \
+  "Copyright (C) 1991-2020 Thomas G. Lane, Guido Vollbeding"
+
+#define JCOPYRIGHT_SHORT \
+  "Copyright (C) @COPYRIGHT_YEAR@ The libjpeg-turbo Project and many others"
--- a/3rdparty/libjpeg-turbo/src/simd/CMakeLists.txt
+++ b/3rdparty/libjpeg-turbo/src/simd/CMakeLists.txt
--- a/3rdparty/libjpeg-turbo/src/simd/arm/aarch32/jccolext-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/aarch32/jccolext-neon.c
+/*
+ * jccolext-neon.c - colorspace conversion (32-bit Arm Neon)
+ *
+ * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
+ * Copyright (C) 2020, D. R. Commander.  All Rights Reserved.
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/* This file is included by jccolor-neon.c */
+
+
+/* RGB -> YCbCr conversion is defined by the following equations:
+ *    Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
+ *    Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B  + 128
+ *    Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B  + 128
+ *
+ * Avoid floating point arithmetic by using shifted integer constants:
+ *    0.29899597 = 19595 * 2^-16
+ *    0.58700561 = 38470 * 2^-16
+ *    0.11399841 =  7471 * 2^-16
+ *    0.16874695 = 11059 * 2^-16
+ *    0.33125305 = 21709 * 2^-16
+ *    0.50000000 = 32768 * 2^-16
+ *    0.41868592 = 27439 * 2^-16
+ *    0.08131409 =  5329 * 2^-16
+ * These constants are defined in jccolor-neon.c
+ *
+ * We add the fixed-point equivalent of 0.5 to Cb and Cr, which effectively
+ * rounds up or down the result via integer truncation.
+ */
+
+void jsimd_rgb_ycc_convert_neon(JDIMENSION image_width, JSAMPARRAY input_buf,
+                                JSAMPIMAGE output_buf, JDIMENSION output_row,
+                                int num_rows)
+{
+  /* Pointer to RGB(X/A) input data */
+  JSAMPROW inptr;
+  /* Pointers to Y, Cb, and Cr output data */
+  JSAMPROW outptr0, outptr1, outptr2;
+  /* Allocate temporary buffer for final (image_width % 8) pixels in row. */
+  ALIGN(16) uint8_t tmp_buf[8 * RGB_PIXELSIZE];
+
+  /* Set up conversion constants. */
+#ifdef HAVE_VLD1_U16_X2
+  const uint16x4x2_t consts = vld1_u16_x2(jsimd_rgb_ycc_neon_consts);
+#else
+  /* GCC does not currently support the intrinsic vld1_<type>_x2(). */
+  const uint16x4_t consts1 = vld1_u16(jsimd_rgb_ycc_neon_consts);
+  const uint16x4_t consts2 = vld1_u16(jsimd_rgb_ycc_neon_consts + 4);
+  const uint16x4x2_t consts = { { consts1, consts2 } };
+#endif
+  const uint32x4_t scaled_128_5 = vdupq_n_u32((128 << 16) + 32767);
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr0 = output_buf[0][output_row];
+    outptr1 = output_buf[1][output_row];
+    outptr2 = output_buf[2][output_row];
+    output_row++;
+
+    int cols_remaining = image_width;
+    for (; cols_remaining > 0; cols_remaining -= 8) {
+
+      /* To prevent buffer overread by the vector load instructions, the last
+       * (image_width % 8) columns of data are first memcopied to a temporary
+       * buffer large enough to accommodate the vector load.
+       */
+      if (cols_remaining < 8) {
+        memcpy(tmp_buf, inptr, cols_remaining * RGB_PIXELSIZE);
+        inptr = tmp_buf;
+      }
+
+#if RGB_PIXELSIZE == 4
+      uint8x8x4_t input_pixels = vld4_u8(inptr);
+#else
+      uint8x8x3_t input_pixels = vld3_u8(inptr);
+#endif
+      uint16x8_t r = vmovl_u8(input_pixels.val[RGB_RED]);
+      uint16x8_t g = vmovl_u8(input_pixels.val[RGB_GREEN]);
+      uint16x8_t b = vmovl_u8(input_pixels.val[RGB_BLUE]);
+
+      /* Compute Y = 0.29900 * R + 0.58700 * G + 0.11400 * B */
+      uint32x4_t y_low = vmull_lane_u16(vget_low_u16(r), consts.val[0], 0);
+      y_low = vmlal_lane_u16(y_low, vget_low_u16(g), consts.val[0], 1);
+      y_low = vmlal_lane_u16(y_low, vget_low_u16(b), consts.val[0], 2);
+      uint32x4_t y_high = vmull_lane_u16(vget_high_u16(r), consts.val[0], 0);
+      y_high = vmlal_lane_u16(y_high, vget_high_u16(g), consts.val[0], 1);
+      y_high = vmlal_lane_u16(y_high, vget_high_u16(b), consts.val[0], 2);
+
+      /* Compute Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B  + 128 */
+      uint32x4_t cb_low = scaled_128_5;
+      cb_low = vmlsl_lane_u16(cb_low, vget_low_u16(r), consts.val[0], 3);
+      cb_low = vmlsl_lane_u16(cb_low, vget_low_u16(g), consts.val[1], 0);
+      cb_low = vmlal_lane_u16(cb_low, vget_low_u16(b), consts.val[1], 1);
+      uint32x4_t cb_high = scaled_128_5;
+      cb_high = vmlsl_lane_u16(cb_high, vget_high_u16(r), consts.val[0], 3);
+      cb_high = vmlsl_lane_u16(cb_high, vget_high_u16(g), consts.val[1], 0);
+      cb_high = vmlal_lane_u16(cb_high, vget_high_u16(b), consts.val[1], 1);
+
+      /* Compute Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B  + 128 */
+      uint32x4_t cr_low = scaled_128_5;
+      cr_low = vmlal_lane_u16(cr_low, vget_low_u16(r), consts.val[1], 1);
+      cr_low = vmlsl_lane_u16(cr_low, vget_low_u16(g), consts.val[1], 2);
+      cr_low = vmlsl_lane_u16(cr_low, vget_low_u16(b), consts.val[1], 3);
+      uint32x4_t cr_high = scaled_128_5;
+      cr_high = vmlal_lane_u16(cr_high, vget_high_u16(r), consts.val[1], 1);
+      cr_high = vmlsl_lane_u16(cr_high, vget_high_u16(g), consts.val[1], 2);
+      cr_high = vmlsl_lane_u16(cr_high, vget_high_u16(b), consts.val[1], 3);
+
+      /* Descale Y values (rounding right shift) and narrow to 16-bit. */
+      uint16x8_t y_u16 = vcombine_u16(vrshrn_n_u32(y_low, 16),
+                                      vrshrn_n_u32(y_high, 16));
+      /* Descale Cb values (right shift) and narrow to 16-bit. */
+      uint16x8_t cb_u16 = vcombine_u16(vshrn_n_u32(cb_low, 16),
+                                       vshrn_n_u32(cb_high, 16));
+      /* Descale Cr values (right shift) and narrow to 16-bit. */
+      uint16x8_t cr_u16 = vcombine_u16(vshrn_n_u32(cr_low, 16),
+                                       vshrn_n_u32(cr_high, 16));
+      /* Narrow Y, Cb, and Cr values to 8-bit and store to memory.  Buffer
+       * overwrite is permitted up to the next multiple of ALIGN_SIZE bytes.
+       */
+      vst1_u8(outptr0, vmovn_u16(y_u16));
+      vst1_u8(outptr1, vmovn_u16(cb_u16));
+      vst1_u8(outptr2, vmovn_u16(cr_u16));
+
+      /* Increment pointers. */
+      inptr += (8 * RGB_PIXELSIZE);
+      outptr0 += 8;
+      outptr1 += 8;
+      outptr2 += 8;
+    }
+  }
+}
--- a/3rdparty/libjpeg-turbo/src/simd/arm/aarch32/jchuff-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/aarch32/jchuff-neon.c
+/*
+ * jchuff-neon.c - Huffman entropy encoding (32-bit Arm Neon)
+ *
+ * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ *
+ * NOTE: All referenced figures are from
+ * Recommendation ITU-T T.81 (1992) | ISO/IEC 10918-1:1994.
+ */
+
+#define JPEG_INTERNALS
+#include "../../../jinclude.h"
+#include "../../../jpeglib.h"
+#include "../../../jsimd.h"
+#include "../../../jdct.h"
+#include "../../../jsimddct.h"
+#include "../../jsimd.h"
+#include "../jchuff.h"
+#include "neon-compat.h"
+
+#include <limits.h>
+
+#include <arm_neon.h>
+
+
+JOCTET *jsimd_huff_encode_one_block_neon(void *state, JOCTET *buffer,
+                                         JCOEFPTR block, int last_dc_val,
+                                         c_derived_tbl *dctbl,
+                                         c_derived_tbl *actbl)
+{
+  uint8_t block_nbits[DCTSIZE2];
+  uint16_t block_diff[DCTSIZE2];
+
+  /* Load rows of coefficients from DCT block in zig-zag order. */
+
+  /* Compute DC coefficient difference value. (F.1.1.5.1) */
+  int16x8_t row0 = vdupq_n_s16(block[0] - last_dc_val);
+  row0 = vld1q_lane_s16(block +  1, row0, 1);
+  row0 = vld1q_lane_s16(block +  8, row0, 2);
+  row0 = vld1q_lane_s16(block + 16, row0, 3);
+  row0 = vld1q_lane_s16(block +  9, row0, 4);
+  row0 = vld1q_lane_s16(block +  2, row0, 5);
+  row0 = vld1q_lane_s16(block +  3, row0, 6);
+  row0 = vld1q_lane_s16(block + 10, row0, 7);
+
+  int16x8_t row1 = vld1q_dup_s16(block + 17);
+  row1 = vld1q_lane_s16(block + 24, row1, 1);
+  row1 = vld1q_lane_s16(block + 32, row1, 2);
+  row1 = vld1q_lane_s16(block + 25, row1, 3);
+  row1 = vld1q_lane_s16(block + 18, row1, 4);
+  row1 = vld1q_lane_s16(block + 11, row1, 5);
+  row1 = vld1q_lane_s16(block +  4, row1, 6);
+  row1 = vld1q_lane_s16(block +  5, row1, 7);
+
+  int16x8_t row2 = vld1q_dup_s16(block + 12);
+  row2 = vld1q_lane_s16(block + 19, row2, 1);
+  row2 = vld1q_lane_s16(block + 26, row2, 2);
+  row2 = vld1q_lane_s16(block + 33, row2, 3);
+  row2 = vld1q_lane_s16(block + 40, row2, 4);
+  row2 = vld1q_lane_s16(block + 48, row2, 5);
+  row2 = vld1q_lane_s16(block + 41, row2, 6);
+  row2 = vld1q_lane_s16(block + 34, row2, 7);
+
+  int16x8_t row3 = vld1q_dup_s16(block + 27);
+  row3 = vld1q_lane_s16(block + 20, row3, 1);
+  row3 = vld1q_lane_s16(block + 13, row3, 2);
+  row3 = vld1q_lane_s16(block +  6, row3, 3);
+  row3 = vld1q_lane_s16(block +  7, row3, 4);
+  row3 = vld1q_lane_s16(block + 14, row3, 5);
+  row3 = vld1q_lane_s16(block + 21, row3, 6);
+  row3 = vld1q_lane_s16(block + 28, row3, 7);
+
+  int16x8_t abs_row0 = vabsq_s16(row0);
+  int16x8_t abs_row1 = vabsq_s16(row1);
+  int16x8_t abs_row2 = vabsq_s16(row2);
+  int16x8_t abs_row3 = vabsq_s16(row3);
+
+  int16x8_t row0_lz = vclzq_s16(abs_row0);
+  int16x8_t row1_lz = vclzq_s16(abs_row1);
+  int16x8_t row2_lz = vclzq_s16(abs_row2);
+  int16x8_t row3_lz = vclzq_s16(abs_row3);
+
+  /* Compute number of bits required to represent each coefficient. */
+  uint8x8_t row0_nbits = vsub_u8(vdup_n_u8(16),
+                                 vmovn_u16(vreinterpretq_u16_s16(row0_lz)));
+  uint8x8_t row1_nbits = vsub_u8(vdup_n_u8(16),
+                                 vmovn_u16(vreinterpretq_u16_s16(row1_lz)));
+  uint8x8_t row2_nbits = vsub_u8(vdup_n_u8(16),
+                                 vmovn_u16(vreinterpretq_u16_s16(row2_lz)));
+  uint8x8_t row3_nbits = vsub_u8(vdup_n_u8(16),
+                                 vmovn_u16(vreinterpretq_u16_s16(row3_lz)));
+
+  vst1_u8(block_nbits + 0 * DCTSIZE, row0_nbits);
+  vst1_u8(block_nbits + 1 * DCTSIZE, row1_nbits);
+  vst1_u8(block_nbits + 2 * DCTSIZE, row2_nbits);
+  vst1_u8(block_nbits + 3 * DCTSIZE, row3_nbits);
+
+  uint16x8_t row0_mask =
+    vshlq_u16(vreinterpretq_u16_s16(vshrq_n_s16(row0, 15)),
+              vnegq_s16(row0_lz));
+  uint16x8_t row1_mask =
+    vshlq_u16(vreinterpretq_u16_s16(vshrq_n_s16(row1, 15)),
+              vnegq_s16(row1_lz));
+  uint16x8_t row2_mask =
+    vshlq_u16(vreinterpretq_u16_s16(vshrq_n_s16(row2, 15)),
+              vnegq_s16(row2_lz));
+  uint16x8_t row3_mask =
+    vshlq_u16(vreinterpretq_u16_s16(vshrq_n_s16(row3, 15)),
+              vnegq_s16(row3_lz));
+
+  uint16x8_t row0_diff = veorq_u16(vreinterpretq_u16_s16(abs_row0), row0_mask);
+  uint16x8_t row1_diff = veorq_u16(vreinterpretq_u16_s16(abs_row1), row1_mask);
+  uint16x8_t row2_diff = veorq_u16(vreinterpretq_u16_s16(abs_row2), row2_mask);
+  uint16x8_t row3_diff = veorq_u16(vreinterpretq_u16_s16(abs_row3), row3_mask);
+
+  /* Store diff values for rows 0, 1, 2, and 3. */
+  vst1q_u16(block_diff + 0 * DCTSIZE, row0_diff);
+  vst1q_u16(block_diff + 1 * DCTSIZE, row1_diff);
+  vst1q_u16(block_diff + 2 * DCTSIZE, row2_diff);
+  vst1q_u16(block_diff + 3 * DCTSIZE, row3_diff);
+
+  /* Load last four rows of coefficients from DCT block in zig-zag order. */
+  int16x8_t row4 = vld1q_dup_s16(block + 35);
+  row4 = vld1q_lane_s16(block + 42, row4, 1);
+  row4 = vld1q_lane_s16(block + 49, row4, 2);
+  row4 = vld1q_lane_s16(block + 56, row4, 3);
+  row4 = vld1q_lane_s16(block + 57, row4, 4);
+  row4 = vld1q_lane_s16(block + 50, row4, 5);
+  row4 = vld1q_lane_s16(block + 43, row4, 6);
+  row4 = vld1q_lane_s16(block + 36, row4, 7);
+
+  int16x8_t row5 = vld1q_dup_s16(block + 29);
+  row5 = vld1q_lane_s16(block + 22, row5, 1);
+  row5 = vld1q_lane_s16(block + 15, row5, 2);
+  row5 = vld1q_lane_s16(block + 23, row5, 3);
+  row5 = vld1q_lane_s16(block + 30, row5, 4);
+  row5 = vld1q_lane_s16(block + 37, row5, 5);
+  row5 = vld1q_lane_s16(block + 44, row5, 6);
+  row5 = vld1q_lane_s16(block + 51, row5, 7);
+
+  int16x8_t row6 = vld1q_dup_s16(block + 58);
+  row6 = vld1q_lane_s16(block + 59, row6, 1);
+  row6 = vld1q_lane_s16(block + 52, row6, 2);
+  row6 = vld1q_lane_s16(block + 45, row6, 3);
+  row6 = vld1q_lane_s16(block + 38, row6, 4);
+  row6 = vld1q_lane_s16(block + 31, row6, 5);
+  row6 = vld1q_lane_s16(block + 39, row6, 6);
+  row6 = vld1q_lane_s16(block + 46, row6, 7);
+
+  int16x8_t row7 = vld1q_dup_s16(block + 53);
+  row7 = vld1q_lane_s16(block + 60, row7, 1);
+  row7 = vld1q_lane_s16(block + 61, row7, 2);
+  row7 = vld1q_lane_s16(block + 54, row7, 3);
+  row7 = vld1q_lane_s16(block + 47, row7, 4);
+  row7 = vld1q_lane_s16(block + 55, row7, 5);
+  row7 = vld1q_lane_s16(block + 62, row7, 6);
+  row7 = vld1q_lane_s16(block + 63, row7, 7);
+
+  int16x8_t abs_row4 = vabsq_s16(row4);
+  int16x8_t abs_row5 = vabsq_s16(row5);
+  int16x8_t abs_row6 = vabsq_s16(row6);
+  int16x8_t abs_row7 = vabsq_s16(row7);
+
+  int16x8_t row4_lz = vclzq_s16(abs_row4);
+  int16x8_t row5_lz = vclzq_s16(abs_row5);
+  int16x8_t row6_lz = vclzq_s16(abs_row6);
+  int16x8_t row7_lz = vclzq_s16(abs_row7);
+
+  /* Compute number of bits required to represent each coefficient. */
+  uint8x8_t row4_nbits = vsub_u8(vdup_n_u8(16),
+                                 vmovn_u16(vreinterpretq_u16_s16(row4_lz)));
+  uint8x8_t row5_nbits = vsub_u8(vdup_n_u8(16),
+                                 vmovn_u16(vreinterpretq_u16_s16(row5_lz)));
+  uint8x8_t row6_nbits = vsub_u8(vdup_n_u8(16),
+                                 vmovn_u16(vreinterpretq_u16_s16(row6_lz)));
+  uint8x8_t row7_nbits = vsub_u8(vdup_n_u8(16),
+                                 vmovn_u16(vreinterpretq_u16_s16(row7_lz)));
+
+  vst1_u8(block_nbits + 4 * DCTSIZE, row4_nbits);
+  vst1_u8(block_nbits + 5 * DCTSIZE, row5_nbits);
+  vst1_u8(block_nbits + 6 * DCTSIZE, row6_nbits);
+  vst1_u8(block_nbits + 7 * DCTSIZE, row7_nbits);
+
+  uint16x8_t row4_mask =
+    vshlq_u16(vreinterpretq_u16_s16(vshrq_n_s16(row4, 15)),
+              vnegq_s16(row4_lz));
+  uint16x8_t row5_mask =
+    vshlq_u16(vreinterpretq_u16_s16(vshrq_n_s16(row5, 15)),
+              vnegq_s16(row5_lz));
+  uint16x8_t row6_mask =
+    vshlq_u16(vreinterpretq_u16_s16(vshrq_n_s16(row6, 15)),
+              vnegq_s16(row6_lz));
+  uint16x8_t row7_mask =
+    vshlq_u16(vreinterpretq_u16_s16(vshrq_n_s16(row7, 15)),
+              vnegq_s16(row7_lz));
+
+  uint16x8_t row4_diff = veorq_u16(vreinterpretq_u16_s16(abs_row4), row4_mask);
+  uint16x8_t row5_diff = veorq_u16(vreinterpretq_u16_s16(abs_row5), row5_mask);
+  uint16x8_t row6_diff = veorq_u16(vreinterpretq_u16_s16(abs_row6), row6_mask);
+  uint16x8_t row7_diff = veorq_u16(vreinterpretq_u16_s16(abs_row7), row7_mask);
+
+  /* Store diff values for rows 4, 5, 6, and 7. */
+  vst1q_u16(block_diff + 4 * DCTSIZE, row4_diff);
+  vst1q_u16(block_diff + 5 * DCTSIZE, row5_diff);
+  vst1q_u16(block_diff + 6 * DCTSIZE, row6_diff);
+  vst1q_u16(block_diff + 7 * DCTSIZE, row7_diff);
+
+  /* Construct bitmap to accelerate encoding of AC coefficients.  A set bit
+   * means that the corresponding coefficient != 0.
+   */
+  uint8x8_t row0_nbits_gt0 = vcgt_u8(row0_nbits, vdup_n_u8(0));
+  uint8x8_t row1_nbits_gt0 = vcgt_u8(row1_nbits, vdup_n_u8(0));
+  uint8x8_t row2_nbits_gt0 = vcgt_u8(row2_nbits, vdup_n_u8(0));
+  uint8x8_t row3_nbits_gt0 = vcgt_u8(row3_nbits, vdup_n_u8(0));
+  uint8x8_t row4_nbits_gt0 = vcgt_u8(row4_nbits, vdup_n_u8(0));
+  uint8x8_t row5_nbits_gt0 = vcgt_u8(row5_nbits, vdup_n_u8(0));
+  uint8x8_t row6_nbits_gt0 = vcgt_u8(row6_nbits, vdup_n_u8(0));
+  uint8x8_t row7_nbits_gt0 = vcgt_u8(row7_nbits, vdup_n_u8(0));
+
+  /* { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 } */
+  const uint8x8_t bitmap_mask =
+    vreinterpret_u8_u64(vmov_n_u64(0x0102040810204080));
+
+  row0_nbits_gt0 = vand_u8(row0_nbits_gt0, bitmap_mask);
+  row1_nbits_gt0 = vand_u8(row1_nbits_gt0, bitmap_mask);
+  row2_nbits_gt0 = vand_u8(row2_nbits_gt0, bitmap_mask);
+  row3_nbits_gt0 = vand_u8(row3_nbits_gt0, bitmap_mask);
+  row4_nbits_gt0 = vand_u8(row4_nbits_gt0, bitmap_mask);
+  row5_nbits_gt0 = vand_u8(row5_nbits_gt0, bitmap_mask);
+  row6_nbits_gt0 = vand_u8(row6_nbits_gt0, bitmap_mask);
+  row7_nbits_gt0 = vand_u8(row7_nbits_gt0, bitmap_mask);
+
+  uint8x8_t bitmap_rows_10 = vpadd_u8(row1_nbits_gt0, row0_nbits_gt0);
+  uint8x8_t bitmap_rows_32 = vpadd_u8(row3_nbits_gt0, row2_nbits_gt0);
+  uint8x8_t bitmap_rows_54 = vpadd_u8(row5_nbits_gt0, row4_nbits_gt0);
+  uint8x8_t bitmap_rows_76 = vpadd_u8(row7_nbits_gt0, row6_nbits_gt0);
+  uint8x8_t bitmap_rows_3210 = vpadd_u8(bitmap_rows_32, bitmap_rows_10);
+  uint8x8_t bitmap_rows_7654 = vpadd_u8(bitmap_rows_76, bitmap_rows_54);
+  uint8x8_t bitmap = vpadd_u8(bitmap_rows_7654, bitmap_rows_3210);
+
+  /* Shift left to remove DC bit. */
+  bitmap = vreinterpret_u8_u64(vshl_n_u64(vreinterpret_u64_u8(bitmap), 1));
+  /* Move bitmap to 32-bit scalar registers. */
+  uint32_t bitmap_1_32 = vget_lane_u32(vreinterpret_u32_u8(bitmap), 1);
+  uint32_t bitmap_33_63 = vget_lane_u32(vreinterpret_u32_u8(bitmap), 0);
+
+  /* Set up state and bit buffer for output bitstream. */
+  working_state *state_ptr = (working_state *)state;
+  int free_bits = state_ptr->cur.free_bits;
+  size_t put_buffer = state_ptr->cur.put_buffer;
+
+  /* Encode DC coefficient. */
+
+  unsigned int nbits = block_nbits[0];
+  /* Emit Huffman-coded symbol and additional diff bits. */
+  unsigned int diff = block_diff[0];
+  PUT_CODE(dctbl->ehufco[nbits], dctbl->ehufsi[nbits], diff)
+
+  /* Encode AC coefficients. */
+
+  unsigned int r = 0;  /* r = run length of zeros */
+  unsigned int i = 1;  /* i = number of coefficients encoded */
+  /* Code and size information for a run length of 16 zero coefficients */
+  const unsigned int code_0xf0 = actbl->ehufco[0xf0];
+  const unsigned int size_0xf0 = actbl->ehufsi[0xf0];
+
+  while (bitmap_1_32 != 0) {
+    r = BUILTIN_CLZ(bitmap_1_32);
+    i += r;
+    bitmap_1_32 <<= r;
+    nbits = block_nbits[i];
+    diff = block_diff[i];
+    while (r > 15) {
+      /* If run length > 15, emit special run-length-16 codes. */
+      PUT_BITS(code_0xf0, size_0xf0)
+      r -= 16;
+    }
+    /* Emit Huffman symbol for run length / number of bits. (F.1.2.2.1) */
+    unsigned int rs = (r << 4) + nbits;
+    PUT_CODE(actbl->ehufco[rs], actbl->ehufsi[rs], diff)
+    i++;
+    bitmap_1_32 <<= 1;
+  }
+
+  r = 33 - i;
+  i = 33;
+
+  while (bitmap_33_63 != 0) {
+    unsigned int leading_zeros = BUILTIN_CLZ(bitmap_33_63);
+    r += leading_zeros;
+    i += leading_zeros;
+    bitmap_33_63 <<= leading_zeros;
+    nbits = block_nbits[i];
+    diff = block_diff[i];
+    while (r > 15) {
+      /* If run length > 15, emit special run-length-16 codes. */
+      PUT_BITS(code_0xf0, size_0xf0)
+      r -= 16;
+    }
+    /* Emit Huffman symbol for run length / number of bits. (F.1.2.2.1) */
+    unsigned int rs = (r << 4) + nbits;
+    PUT_CODE(actbl->ehufco[rs], actbl->ehufsi[rs], diff)
+    r = 0;
+    i++;
+    bitmap_33_63 <<= 1;
+  }
+
+  /* If the last coefficient(s) were zero, emit an end-of-block (EOB) code.
+   * The value of RS for the EOB code is 0.
+   */
+  if (i != 64) {
+    PUT_BITS(actbl->ehufco[0], actbl->ehufsi[0])
+  }
+
+  state_ptr->cur.put_buffer = put_buffer;
+  state_ptr->cur.free_bits = free_bits;
+
+  return buffer;
+}
--- a/3rdparty/libjpeg-turbo/src/simd/arm/aarch32/jsimd.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/aarch32/jsimd.c
--- a/3rdparty/libjpeg-turbo/src/simd/arm/aarch32/jsimd_neon.S
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/aarch32/jsimd_neon.S
--- a/3rdparty/libjpeg-turbo/src/simd/arm/aarch64/jccolext-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/aarch64/jccolext-neon.c
+/*
+ * jccolext-neon.c - colorspace conversion (64-bit Arm Neon)
+ *
+ * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/* This file is included by jccolor-neon.c */
+
+
+/* RGB -> YCbCr conversion is defined by the following equations:
+ *    Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
+ *    Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B  + 128
+ *    Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B  + 128
+ *
+ * Avoid floating point arithmetic by using shifted integer constants:
+ *    0.29899597 = 19595 * 2^-16
+ *    0.58700561 = 38470 * 2^-16
+ *    0.11399841 =  7471 * 2^-16
+ *    0.16874695 = 11059 * 2^-16
+ *    0.33125305 = 21709 * 2^-16
+ *    0.50000000 = 32768 * 2^-16
+ *    0.41868592 = 27439 * 2^-16
+ *    0.08131409 =  5329 * 2^-16
+ * These constants are defined in jccolor-neon.c
+ *
+ * We add the fixed-point equivalent of 0.5 to Cb and Cr, which effectively
+ * rounds up or down the result via integer truncation.
+ */
+
+void jsimd_rgb_ycc_convert_neon(JDIMENSION image_width, JSAMPARRAY input_buf,
+                                JSAMPIMAGE output_buf, JDIMENSION output_row,
+                                int num_rows)
+{
+  /* Pointer to RGB(X/A) input data */
+  JSAMPROW inptr;
+  /* Pointers to Y, Cb, and Cr output data */
+  JSAMPROW outptr0, outptr1, outptr2;
+  /* Allocate temporary buffer for final (image_width % 16) pixels in row. */
+  ALIGN(16) uint8_t tmp_buf[16 * RGB_PIXELSIZE];
+
+  /* Set up conversion constants. */
+  const uint16x8_t consts = vld1q_u16(jsimd_rgb_ycc_neon_consts);
+  const uint32x4_t scaled_128_5 = vdupq_n_u32((128 << 16) + 32767);
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr0 = output_buf[0][output_row];
+    outptr1 = output_buf[1][output_row];
+    outptr2 = output_buf[2][output_row];
+    output_row++;
+
+    int cols_remaining = image_width;
+    for (; cols_remaining >= 16; cols_remaining -= 16) {
+
+#if RGB_PIXELSIZE == 4
+      uint8x16x4_t input_pixels = vld4q_u8(inptr);
+#else
+      uint8x16x3_t input_pixels = vld3q_u8(inptr);
+#endif
+      uint16x8_t r_l = vmovl_u8(vget_low_u8(input_pixels.val[RGB_RED]));
+      uint16x8_t g_l = vmovl_u8(vget_low_u8(input_pixels.val[RGB_GREEN]));
+      uint16x8_t b_l = vmovl_u8(vget_low_u8(input_pixels.val[RGB_BLUE]));
+      uint16x8_t r_h = vmovl_u8(vget_high_u8(input_pixels.val[RGB_RED]));
+      uint16x8_t g_h = vmovl_u8(vget_high_u8(input_pixels.val[RGB_GREEN]));
+      uint16x8_t b_h = vmovl_u8(vget_high_u8(input_pixels.val[RGB_BLUE]));
+
+      /* Compute Y = 0.29900 * R + 0.58700 * G + 0.11400 * B */
+      uint32x4_t y_ll = vmull_laneq_u16(vget_low_u16(r_l), consts, 0);
+      y_ll = vmlal_laneq_u16(y_ll, vget_low_u16(g_l), consts, 1);
+      y_ll = vmlal_laneq_u16(y_ll, vget_low_u16(b_l), consts, 2);
+      uint32x4_t y_lh = vmull_laneq_u16(vget_high_u16(r_l), consts, 0);
+      y_lh = vmlal_laneq_u16(y_lh, vget_high_u16(g_l), consts, 1);
+      y_lh = vmlal_laneq_u16(y_lh, vget_high_u16(b_l), consts, 2);
+      uint32x4_t y_hl = vmull_laneq_u16(vget_low_u16(r_h), consts, 0);
+      y_hl = vmlal_laneq_u16(y_hl, vget_low_u16(g_h), consts, 1);
+      y_hl = vmlal_laneq_u16(y_hl, vget_low_u16(b_h), consts, 2);
+      uint32x4_t y_hh = vmull_laneq_u16(vget_high_u16(r_h), consts, 0);
+      y_hh = vmlal_laneq_u16(y_hh, vget_high_u16(g_h), consts, 1);
+      y_hh = vmlal_laneq_u16(y_hh, vget_high_u16(b_h), consts, 2);
+
+      /* Compute Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B  + 128 */
+      uint32x4_t cb_ll = scaled_128_5;
+      cb_ll = vmlsl_laneq_u16(cb_ll, vget_low_u16(r_l), consts, 3);
+      cb_ll = vmlsl_laneq_u16(cb_ll, vget_low_u16(g_l), consts, 4);
+      cb_ll = vmlal_laneq_u16(cb_ll, vget_low_u16(b_l), consts, 5);
+      uint32x4_t cb_lh = scaled_128_5;
+      cb_lh = vmlsl_laneq_u16(cb_lh, vget_high_u16(r_l), consts, 3);
+      cb_lh = vmlsl_laneq_u16(cb_lh, vget_high_u16(g_l), consts, 4);
+      cb_lh = vmlal_laneq_u16(cb_lh, vget_high_u16(b_l), consts, 5);
+      uint32x4_t cb_hl = scaled_128_5;
+      cb_hl = vmlsl_laneq_u16(cb_hl, vget_low_u16(r_h), consts, 3);
+      cb_hl = vmlsl_laneq_u16(cb_hl, vget_low_u16(g_h), consts, 4);
+      cb_hl = vmlal_laneq_u16(cb_hl, vget_low_u16(b_h), consts, 5);
+      uint32x4_t cb_hh = scaled_128_5;
+      cb_hh = vmlsl_laneq_u16(cb_hh, vget_high_u16(r_h), consts, 3);
+      cb_hh = vmlsl_laneq_u16(cb_hh, vget_high_u16(g_h), consts, 4);
+      cb_hh = vmlal_laneq_u16(cb_hh, vget_high_u16(b_h), consts, 5);
+
+      /* Compute Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B  + 128 */
+      uint32x4_t cr_ll = scaled_128_5;
+      cr_ll = vmlal_laneq_u16(cr_ll, vget_low_u16(r_l), consts, 5);
+      cr_ll = vmlsl_laneq_u16(cr_ll, vget_low_u16(g_l), consts, 6);
+      cr_ll = vmlsl_laneq_u16(cr_ll, vget_low_u16(b_l), consts, 7);
+      uint32x4_t cr_lh = scaled_128_5;
+      cr_lh = vmlal_laneq_u16(cr_lh, vget_high_u16(r_l), consts, 5);
+      cr_lh = vmlsl_laneq_u16(cr_lh, vget_high_u16(g_l), consts, 6);
+      cr_lh = vmlsl_laneq_u16(cr_lh, vget_high_u16(b_l), consts, 7);
+      uint32x4_t cr_hl = scaled_128_5;
+      cr_hl = vmlal_laneq_u16(cr_hl, vget_low_u16(r_h), consts, 5);
+      cr_hl = vmlsl_laneq_u16(cr_hl, vget_low_u16(g_h), consts, 6);
+      cr_hl = vmlsl_laneq_u16(cr_hl, vget_low_u16(b_h), consts, 7);
+      uint32x4_t cr_hh = scaled_128_5;
+      cr_hh = vmlal_laneq_u16(cr_hh, vget_high_u16(r_h), consts, 5);
+      cr_hh = vmlsl_laneq_u16(cr_hh, vget_high_u16(g_h), consts, 6);
+      cr_hh = vmlsl_laneq_u16(cr_hh, vget_high_u16(b_h), consts, 7);
+
+      /* Descale Y values (rounding right shift) and narrow to 16-bit. */
+      uint16x8_t y_l = vcombine_u16(vrshrn_n_u32(y_ll, 16),
+                                    vrshrn_n_u32(y_lh, 16));
+      uint16x8_t y_h = vcombine_u16(vrshrn_n_u32(y_hl, 16),
+                                    vrshrn_n_u32(y_hh, 16));
+      /* Descale Cb values (right shift) and narrow to 16-bit. */
+      uint16x8_t cb_l = vcombine_u16(vshrn_n_u32(cb_ll, 16),
+                                     vshrn_n_u32(cb_lh, 16));
+      uint16x8_t cb_h = vcombine_u16(vshrn_n_u32(cb_hl, 16),
+                                     vshrn_n_u32(cb_hh, 16));
+      /* Descale Cr values (right shift) and narrow to 16-bit. */
+      uint16x8_t cr_l = vcombine_u16(vshrn_n_u32(cr_ll, 16),
+                                     vshrn_n_u32(cr_lh, 16));
+      uint16x8_t cr_h = vcombine_u16(vshrn_n_u32(cr_hl, 16),
+                                     vshrn_n_u32(cr_hh, 16));
+      /* Narrow Y, Cb, and Cr values to 8-bit and store to memory.  Buffer
+       * overwrite is permitted up to the next multiple of ALIGN_SIZE bytes.
+       */
+      vst1q_u8(outptr0, vcombine_u8(vmovn_u16(y_l), vmovn_u16(y_h)));
+      vst1q_u8(outptr1, vcombine_u8(vmovn_u16(cb_l), vmovn_u16(cb_h)));
+      vst1q_u8(outptr2, vcombine_u8(vmovn_u16(cr_l), vmovn_u16(cr_h)));
+
+      /* Increment pointers. */
+      inptr += (16 * RGB_PIXELSIZE);
+      outptr0 += 16;
+      outptr1 += 16;
+      outptr2 += 16;
+    }
+
+    if (cols_remaining > 8) {
+      /* To prevent buffer overread by the vector load instructions, the last
+       * (image_width % 16) columns of data are first memcopied to a temporary
+       * buffer large enough to accommodate the vector load.
+       */
+      memcpy(tmp_buf, inptr, cols_remaining * RGB_PIXELSIZE);
+      inptr = tmp_buf;
+
+#if RGB_PIXELSIZE == 4
+      uint8x16x4_t input_pixels = vld4q_u8(inptr);
+#else
+      uint8x16x3_t input_pixels = vld3q_u8(inptr);
+#endif
+      uint16x8_t r_l = vmovl_u8(vget_low_u8(input_pixels.val[RGB_RED]));
+      uint16x8_t g_l = vmovl_u8(vget_low_u8(input_pixels.val[RGB_GREEN]));
+      uint16x8_t b_l = vmovl_u8(vget_low_u8(input_pixels.val[RGB_BLUE]));
+      uint16x8_t r_h = vmovl_u8(vget_high_u8(input_pixels.val[RGB_RED]));
+      uint16x8_t g_h = vmovl_u8(vget_high_u8(input_pixels.val[RGB_GREEN]));
+      uint16x8_t b_h = vmovl_u8(vget_high_u8(input_pixels.val[RGB_BLUE]));
+
+      /* Compute Y = 0.29900 * R + 0.58700 * G + 0.11400 * B */
+      uint32x4_t y_ll = vmull_laneq_u16(vget_low_u16(r_l), consts, 0);
+      y_ll = vmlal_laneq_u16(y_ll, vget_low_u16(g_l), consts, 1);
+      y_ll = vmlal_laneq_u16(y_ll, vget_low_u16(b_l), consts, 2);
+      uint32x4_t y_lh = vmull_laneq_u16(vget_high_u16(r_l), consts, 0);
+      y_lh = vmlal_laneq_u16(y_lh, vget_high_u16(g_l), consts, 1);
+      y_lh = vmlal_laneq_u16(y_lh, vget_high_u16(b_l), consts, 2);
+      uint32x4_t y_hl = vmull_laneq_u16(vget_low_u16(r_h), consts, 0);
+      y_hl = vmlal_laneq_u16(y_hl, vget_low_u16(g_h), consts, 1);
+      y_hl = vmlal_laneq_u16(y_hl, vget_low_u16(b_h), consts, 2);
+      uint32x4_t y_hh = vmull_laneq_u16(vget_high_u16(r_h), consts, 0);
+      y_hh = vmlal_laneq_u16(y_hh, vget_high_u16(g_h), consts, 1);
+      y_hh = vmlal_laneq_u16(y_hh, vget_high_u16(b_h), consts, 2);
+
+      /* Compute Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B  + 128 */
+      uint32x4_t cb_ll = scaled_128_5;
+      cb_ll = vmlsl_laneq_u16(cb_ll, vget_low_u16(r_l), consts, 3);
+      cb_ll = vmlsl_laneq_u16(cb_ll, vget_low_u16(g_l), consts, 4);
+      cb_ll = vmlal_laneq_u16(cb_ll, vget_low_u16(b_l), consts, 5);
+      uint32x4_t cb_lh = scaled_128_5;
+      cb_lh = vmlsl_laneq_u16(cb_lh, vget_high_u16(r_l), consts, 3);
+      cb_lh = vmlsl_laneq_u16(cb_lh, vget_high_u16(g_l), consts, 4);
+      cb_lh = vmlal_laneq_u16(cb_lh, vget_high_u16(b_l), consts, 5);
+      uint32x4_t cb_hl = scaled_128_5;
+      cb_hl = vmlsl_laneq_u16(cb_hl, vget_low_u16(r_h), consts, 3);
+      cb_hl = vmlsl_laneq_u16(cb_hl, vget_low_u16(g_h), consts, 4);
+      cb_hl = vmlal_laneq_u16(cb_hl, vget_low_u16(b_h), consts, 5);
+      uint32x4_t cb_hh = scaled_128_5;
+      cb_hh = vmlsl_laneq_u16(cb_hh, vget_high_u16(r_h), consts, 3);
+      cb_hh = vmlsl_laneq_u16(cb_hh, vget_high_u16(g_h), consts, 4);
+      cb_hh = vmlal_laneq_u16(cb_hh, vget_high_u16(b_h), consts, 5);
+
+      /* Compute Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B  + 128 */
+      uint32x4_t cr_ll = scaled_128_5;
+      cr_ll = vmlal_laneq_u16(cr_ll, vget_low_u16(r_l), consts, 5);
+      cr_ll = vmlsl_laneq_u16(cr_ll, vget_low_u16(g_l), consts, 6);
+      cr_ll = vmlsl_laneq_u16(cr_ll, vget_low_u16(b_l), consts, 7);
+      uint32x4_t cr_lh = scaled_128_5;
+      cr_lh = vmlal_laneq_u16(cr_lh, vget_high_u16(r_l), consts, 5);
+      cr_lh = vmlsl_laneq_u16(cr_lh, vget_high_u16(g_l), consts, 6);
+      cr_lh = vmlsl_laneq_u16(cr_lh, vget_high_u16(b_l), consts, 7);
+      uint32x4_t cr_hl = scaled_128_5;
+      cr_hl = vmlal_laneq_u16(cr_hl, vget_low_u16(r_h), consts, 5);
+      cr_hl = vmlsl_laneq_u16(cr_hl, vget_low_u16(g_h), consts, 6);
+      cr_hl = vmlsl_laneq_u16(cr_hl, vget_low_u16(b_h), consts, 7);
+      uint32x4_t cr_hh = scaled_128_5;
+      cr_hh = vmlal_laneq_u16(cr_hh, vget_high_u16(r_h), consts, 5);
+      cr_hh = vmlsl_laneq_u16(cr_hh, vget_high_u16(g_h), consts, 6);
+      cr_hh = vmlsl_laneq_u16(cr_hh, vget_high_u16(b_h), consts, 7);
+
+      /* Descale Y values (rounding right shift) and narrow to 16-bit. */
+      uint16x8_t y_l = vcombine_u16(vrshrn_n_u32(y_ll, 16),
+                                    vrshrn_n_u32(y_lh, 16));
+      uint16x8_t y_h = vcombine_u16(vrshrn_n_u32(y_hl, 16),
+                                    vrshrn_n_u32(y_hh, 16));
+      /* Descale Cb values (right shift) and narrow to 16-bit. */
+      uint16x8_t cb_l = vcombine_u16(vshrn_n_u32(cb_ll, 16),
+                                     vshrn_n_u32(cb_lh, 16));
+      uint16x8_t cb_h = vcombine_u16(vshrn_n_u32(cb_hl, 16),
+                                     vshrn_n_u32(cb_hh, 16));
+      /* Descale Cr values (right shift) and narrow to 16-bit. */
+      uint16x8_t cr_l = vcombine_u16(vshrn_n_u32(cr_ll, 16),
+                                     vshrn_n_u32(cr_lh, 16));
+      uint16x8_t cr_h = vcombine_u16(vshrn_n_u32(cr_hl, 16),
+                                     vshrn_n_u32(cr_hh, 16));
+      /* Narrow Y, Cb, and Cr values to 8-bit and store to memory.  Buffer
+       * overwrite is permitted up to the next multiple of ALIGN_SIZE bytes.
+       */
+      vst1q_u8(outptr0, vcombine_u8(vmovn_u16(y_l), vmovn_u16(y_h)));
+      vst1q_u8(outptr1, vcombine_u8(vmovn_u16(cb_l), vmovn_u16(cb_h)));
+      vst1q_u8(outptr2, vcombine_u8(vmovn_u16(cr_l), vmovn_u16(cr_h)));
+
+    } else if (cols_remaining > 0) {
+      /* To prevent buffer overread by the vector load instructions, the last
+       * (image_width % 8) columns of data are first memcopied to a temporary
+       * buffer large enough to accommodate the vector load.
+       */
+      memcpy(tmp_buf, inptr, cols_remaining * RGB_PIXELSIZE);
+      inptr = tmp_buf;
+
+#if RGB_PIXELSIZE == 4
+      uint8x8x4_t input_pixels = vld4_u8(inptr);
+#else
+      uint8x8x3_t input_pixels = vld3_u8(inptr);
+#endif
+      uint16x8_t r = vmovl_u8(input_pixels.val[RGB_RED]);
+      uint16x8_t g = vmovl_u8(input_pixels.val[RGB_GREEN]);
+      uint16x8_t b = vmovl_u8(input_pixels.val[RGB_BLUE]);
+
+      /* Compute Y = 0.29900 * R + 0.58700 * G + 0.11400 * B */
+      uint32x4_t y_l = vmull_laneq_u16(vget_low_u16(r), consts, 0);
+      y_l = vmlal_laneq_u16(y_l, vget_low_u16(g), consts, 1);
+      y_l = vmlal_laneq_u16(y_l, vget_low_u16(b), consts, 2);
+      uint32x4_t y_h = vmull_laneq_u16(vget_high_u16(r), consts, 0);
+      y_h = vmlal_laneq_u16(y_h, vget_high_u16(g), consts, 1);
+      y_h = vmlal_laneq_u16(y_h, vget_high_u16(b), consts, 2);
+
+      /* Compute Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B  + 128 */
+      uint32x4_t cb_l = scaled_128_5;
+      cb_l = vmlsl_laneq_u16(cb_l, vget_low_u16(r), consts, 3);
+      cb_l = vmlsl_laneq_u16(cb_l, vget_low_u16(g), consts, 4);
+      cb_l = vmlal_laneq_u16(cb_l, vget_low_u16(b), consts, 5);
+      uint32x4_t cb_h = scaled_128_5;
+      cb_h = vmlsl_laneq_u16(cb_h, vget_high_u16(r), consts, 3);
+      cb_h = vmlsl_laneq_u16(cb_h, vget_high_u16(g), consts, 4);
+      cb_h = vmlal_laneq_u16(cb_h, vget_high_u16(b), consts, 5);
+
+      /* Compute Cr = 0.50000 * R - 0.41869 * G - 0.08131 * B  + 128 */
+      uint32x4_t cr_l = scaled_128_5;
+      cr_l = vmlal_laneq_u16(cr_l, vget_low_u16(r), consts, 5);
+      cr_l = vmlsl_laneq_u16(cr_l, vget_low_u16(g), consts, 6);
+      cr_l = vmlsl_laneq_u16(cr_l, vget_low_u16(b), consts, 7);
+      uint32x4_t cr_h = scaled_128_5;
+      cr_h = vmlal_laneq_u16(cr_h, vget_high_u16(r), consts, 5);
+      cr_h = vmlsl_laneq_u16(cr_h, vget_high_u16(g), consts, 6);
+      cr_h = vmlsl_laneq_u16(cr_h, vget_high_u16(b), consts, 7);
+
+      /* Descale Y values (rounding right shift) and narrow to 16-bit. */
+      uint16x8_t y_u16 = vcombine_u16(vrshrn_n_u32(y_l, 16),
+                                      vrshrn_n_u32(y_h, 16));
+      /* Descale Cb values (right shift) and narrow to 16-bit. */
+      uint16x8_t cb_u16 = vcombine_u16(vshrn_n_u32(cb_l, 16),
+                                       vshrn_n_u32(cb_h, 16));
+      /* Descale Cr values (right shift) and narrow to 16-bit. */
+      uint16x8_t cr_u16 = vcombine_u16(vshrn_n_u32(cr_l, 16),
+                                       vshrn_n_u32(cr_h, 16));
+      /* Narrow Y, Cb, and Cr values to 8-bit and store to memory.  Buffer
+       * overwrite is permitted up to the next multiple of ALIGN_SIZE bytes.
+       */
+      vst1_u8(outptr0, vmovn_u16(y_u16));
+      vst1_u8(outptr1, vmovn_u16(cb_u16));
+      vst1_u8(outptr2, vmovn_u16(cr_u16));
+    }
+  }
+}
--- a/3rdparty/libjpeg-turbo/src/simd/arm/aarch64/jchuff-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/aarch64/jchuff-neon.c
+/*
+ * jchuff-neon.c - Huffman entropy encoding (64-bit Arm Neon)
+ *
+ * Copyright (C) 2020-2021, Arm Limited.  All Rights Reserved.
+ * Copyright (C) 2020, 2022, D. R. Commander.  All Rights Reserved.
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ *
+ * NOTE: All referenced figures are from
+ * Recommendation ITU-T T.81 (1992) | ISO/IEC 10918-1:1994.
+ */
+
+#define JPEG_INTERNALS
+#include "../../../jinclude.h"
+#include "../../../jpeglib.h"
+#include "../../../jsimd.h"
+#include "../../../jdct.h"
+#include "../../../jsimddct.h"
+#include "../../jsimd.h"
+#include "../align.h"
+#include "../jchuff.h"
+#include "neon-compat.h"
+
+#include <limits.h>
+
+#include <arm_neon.h>
+
+
+ALIGN(16) static const uint8_t jsimd_huff_encode_one_block_consts[] = {
+    0,   1,   2,   3,  16,  17,  32,  33,
+   18,  19,   4,   5,   6,   7,  20,  21,
+   34,  35,  48,  49, 255, 255,  50,  51,
+   36,  37,  22,  23,   8,   9,  10,  11,
+  255, 255,   6,   7,  20,  21,  34,  35,
+   48,  49, 255, 255,  50,  51,  36,  37,
+   54,  55,  40,  41,  26,  27,  12,  13,
+   14,  15,  28,  29,  42,  43,  56,  57,
+    6,   7,  20,  21,  34,  35,  48,  49,
+   50,  51,  36,  37,  22,  23,   8,   9,
+   26,  27,  12,  13, 255, 255,  14,  15,
+   28,  29,  42,  43,  56,  57, 255, 255,
+   52,  53,  54,  55,  40,  41,  26,  27,
+   12,  13, 255, 255,  14,  15,  28,  29,
+   26,  27,  40,  41,  42,  43,  28,  29,
+   14,  15,  30,  31,  44,  45,  46,  47
+};
+
+/* The AArch64 implementation of the FLUSH() macro triggers a UBSan misaligned
+ * address warning because the macro sometimes writes a 64-bit value to a
+ * non-64-bit-aligned address.  That behavior is technically undefined per
+ * the C specification, but it is supported by the AArch64 architecture and
+ * compilers.
+ */
+#if defined(__has_feature)
+#if __has_feature(undefined_behavior_sanitizer)
+__attribute__((no_sanitize("alignment")))
+#endif
+#endif
+JOCTET *jsimd_huff_encode_one_block_neon(void *state, JOCTET *buffer,
+                                         JCOEFPTR block, int last_dc_val,
+                                         c_derived_tbl *dctbl,
+                                         c_derived_tbl *actbl)
+{
+  uint16_t block_diff[DCTSIZE2];
+
+  /* Load lookup table indices for rows of zig-zag ordering. */
+#ifdef HAVE_VLD1Q_U8_X4
+  const uint8x16x4_t idx_rows_0123 =
+    vld1q_u8_x4(jsimd_huff_encode_one_block_consts + 0 * DCTSIZE);
+  const uint8x16x4_t idx_rows_4567 =
+    vld1q_u8_x4(jsimd_huff_encode_one_block_consts + 8 * DCTSIZE);
+#else
+  /* GCC does not currently support intrinsics vl1dq_<type>_x4(). */
+  const uint8x16x4_t idx_rows_0123 = { {
+    vld1q_u8(jsimd_huff_encode_one_block_consts + 0 * DCTSIZE),
+    vld1q_u8(jsimd_huff_encode_one_block_consts + 2 * DCTSIZE),
+    vld1q_u8(jsimd_huff_encode_one_block_consts + 4 * DCTSIZE),
+    vld1q_u8(jsimd_huff_encode_one_block_consts + 6 * DCTSIZE)
+  } };
+  const uint8x16x4_t idx_rows_4567 = { {
+    vld1q_u8(jsimd_huff_encode_one_block_consts + 8 * DCTSIZE),
+    vld1q_u8(jsimd_huff_encode_one_block_consts + 10 * DCTSIZE),
+    vld1q_u8(jsimd_huff_encode_one_block_consts + 12 * DCTSIZE),
+    vld1q_u8(jsimd_huff_encode_one_block_consts + 14 * DCTSIZE)
+  } };
+#endif
+
+  /* Load 8x8 block of DCT coefficients. */
+#ifdef HAVE_VLD1Q_U8_X4
+  const int8x16x4_t tbl_rows_0123 =
+    vld1q_s8_x4((int8_t *)(block + 0 * DCTSIZE));
+  const int8x16x4_t tbl_rows_4567 =
+    vld1q_s8_x4((int8_t *)(block + 4 * DCTSIZE));
+#else
+  const int8x16x4_t tbl_rows_0123 = { {
+    vld1q_s8((int8_t *)(block + 0 * DCTSIZE)),
+    vld1q_s8((int8_t *)(block + 1 * DCTSIZE)),
+    vld1q_s8((int8_t *)(block + 2 * DCTSIZE)),
+    vld1q_s8((int8_t *)(block + 3 * DCTSIZE))
+  } };
+  const int8x16x4_t tbl_rows_4567 = { {
+    vld1q_s8((int8_t *)(block + 4 * DCTSIZE)),
+    vld1q_s8((int8_t *)(block + 5 * DCTSIZE)),
+    vld1q_s8((int8_t *)(block + 6 * DCTSIZE)),
+    vld1q_s8((int8_t *)(block + 7 * DCTSIZE))
+  } };
+#endif
+
+  /* Initialise extra lookup tables. */
+  const int8x16x4_t tbl_rows_2345 = { {
+    tbl_rows_0123.val[2], tbl_rows_0123.val[3],
+    tbl_rows_4567.val[0], tbl_rows_4567.val[1]
+  } };
+  const int8x16x3_t tbl_rows_567 =
+    { { tbl_rows_4567.val[1], tbl_rows_4567.val[2], tbl_rows_4567.val[3] } };
+
+  /* Shuffle coefficients into zig-zag order. */
+  int16x8_t row0 =
+    vreinterpretq_s16_s8(vqtbl4q_s8(tbl_rows_0123, idx_rows_0123.val[0]));
+  int16x8_t row1 =
+    vreinterpretq_s16_s8(vqtbl4q_s8(tbl_rows_0123, idx_rows_0123.val[1]));
+  int16x8_t row2 =
+    vreinterpretq_s16_s8(vqtbl4q_s8(tbl_rows_2345, idx_rows_0123.val[2]));
+  int16x8_t row3 =
+    vreinterpretq_s16_s8(vqtbl4q_s8(tbl_rows_0123, idx_rows_0123.val[3]));
+  int16x8_t row4 =
+    vreinterpretq_s16_s8(vqtbl4q_s8(tbl_rows_4567, idx_rows_4567.val[0]));
+  int16x8_t row5 =
+    vreinterpretq_s16_s8(vqtbl4q_s8(tbl_rows_2345, idx_rows_4567.val[1]));
+  int16x8_t row6 =
+    vreinterpretq_s16_s8(vqtbl4q_s8(tbl_rows_4567, idx_rows_4567.val[2]));
+  int16x8_t row7 =
+    vreinterpretq_s16_s8(vqtbl3q_s8(tbl_rows_567, idx_rows_4567.val[3]));
+
+  /* Compute DC coefficient difference value (F.1.1.5.1). */
+  row0 = vsetq_lane_s16(block[0] - last_dc_val, row0, 0);
+  /* Initialize AC coefficient lanes not reachable by lookup tables. */
+  row1 =
+    vsetq_lane_s16(vgetq_lane_s16(vreinterpretq_s16_s8(tbl_rows_4567.val[0]),
+                                  0), row1, 2);
+  row2 =
+    vsetq_lane_s16(vgetq_lane_s16(vreinterpretq_s16_s8(tbl_rows_0123.val[1]),
+                                  4), row2, 0);
+  row2 =
+    vsetq_lane_s16(vgetq_lane_s16(vreinterpretq_s16_s8(tbl_rows_4567.val[2]),
+                                  0), row2, 5);
+  row5 =
+    vsetq_lane_s16(vgetq_lane_s16(vreinterpretq_s16_s8(tbl_rows_0123.val[1]),
+                                  7), row5, 2);
+  row5 =
+    vsetq_lane_s16(vgetq_lane_s16(vreinterpretq_s16_s8(tbl_rows_4567.val[2]),
+                                  3), row5, 7);
+  row6 =
+    vsetq_lane_s16(vgetq_lane_s16(vreinterpretq_s16_s8(tbl_rows_0123.val[3]),
+                                  7), row6, 5);
+
+  /* DCT block is now in zig-zag order; start Huffman encoding process. */
+
+  /* Construct bitmap to accelerate encoding of AC coefficients.  A set bit
+   * means that the corresponding coefficient != 0.
+   */
+  uint16x8_t row0_ne_0 = vtstq_s16(row0, row0);
+  uint16x8_t row1_ne_0 = vtstq_s16(row1, row1);
+  uint16x8_t row2_ne_0 = vtstq_s16(row2, row2);
+  uint16x8_t row3_ne_0 = vtstq_s16(row3, row3);
+  uint16x8_t row4_ne_0 = vtstq_s16(row4, row4);
+  uint16x8_t row5_ne_0 = vtstq_s16(row5, row5);
+  uint16x8_t row6_ne_0 = vtstq_s16(row6, row6);
+  uint16x8_t row7_ne_0 = vtstq_s16(row7, row7);
+
+  uint8x16_t row10_ne_0 = vuzp1q_u8(vreinterpretq_u8_u16(row1_ne_0),
+                                    vreinterpretq_u8_u16(row0_ne_0));
+  uint8x16_t row32_ne_0 = vuzp1q_u8(vreinterpretq_u8_u16(row3_ne_0),
+                                    vreinterpretq_u8_u16(row2_ne_0));
+  uint8x16_t row54_ne_0 = vuzp1q_u8(vreinterpretq_u8_u16(row5_ne_0),
+                                    vreinterpretq_u8_u16(row4_ne_0));
+  uint8x16_t row76_ne_0 = vuzp1q_u8(vreinterpretq_u8_u16(row7_ne_0),
+                                    vreinterpretq_u8_u16(row6_ne_0));
+
+  /* { 0x80, 0x40, 0x20, 0x10, 0x08, 0x04, 0x02, 0x01 } */
+  const uint8x16_t bitmap_mask =
+    vreinterpretq_u8_u64(vdupq_n_u64(0x0102040810204080));
+
+  uint8x16_t bitmap_rows_10 = vandq_u8(row10_ne_0, bitmap_mask);
+  uint8x16_t bitmap_rows_32 = vandq_u8(row32_ne_0, bitmap_mask);
+  uint8x16_t bitmap_rows_54 = vandq_u8(row54_ne_0, bitmap_mask);
+  uint8x16_t bitmap_rows_76 = vandq_u8(row76_ne_0, bitmap_mask);
+
+  uint8x16_t bitmap_rows_3210 = vpaddq_u8(bitmap_rows_32, bitmap_rows_10);
+  uint8x16_t bitmap_rows_7654 = vpaddq_u8(bitmap_rows_76, bitmap_rows_54);
+  uint8x16_t bitmap_rows_76543210 = vpaddq_u8(bitmap_rows_7654,
+                                              bitmap_rows_3210);
+  uint8x8_t bitmap_all = vpadd_u8(vget_low_u8(bitmap_rows_76543210),
+                                  vget_high_u8(bitmap_rows_76543210));
+
+  /* Shift left to remove DC bit. */
+  bitmap_all =
+    vreinterpret_u8_u64(vshl_n_u64(vreinterpret_u64_u8(bitmap_all), 1));
+  /* Count bits set (number of non-zero coefficients) in bitmap. */
+  unsigned int non_zero_coefficients = vaddv_u8(vcnt_u8(bitmap_all));
+  /* Move bitmap to 64-bit scalar register. */
+  uint64_t bitmap = vget_lane_u64(vreinterpret_u64_u8(bitmap_all), 0);
+
+  /* Set up state and bit buffer for output bitstream. */
+  working_state *state_ptr = (working_state *)state;
+  int free_bits = state_ptr->cur.free_bits;
+  size_t put_buffer = state_ptr->cur.put_buffer;
+
+  /* Encode DC coefficient. */
+
+  /* For negative coeffs: diff = abs(coeff) -1 = ~abs(coeff) */
+  int16x8_t abs_row0 = vabsq_s16(row0);
+  int16x8_t row0_lz = vclzq_s16(abs_row0);
+  uint16x8_t row0_mask = vshlq_u16(vcltzq_s16(row0), vnegq_s16(row0_lz));
+  uint16x8_t row0_diff = veorq_u16(vreinterpretq_u16_s16(abs_row0), row0_mask);
+  /* Find nbits required to specify sign and amplitude of coefficient. */
+  unsigned int lz = vgetq_lane_u16(vreinterpretq_u16_s16(row0_lz), 0);
+  unsigned int nbits = 16 - lz;
+  /* Emit Huffman-coded symbol and additional diff bits. */
+  unsigned int diff = vgetq_lane_u16(row0_diff, 0);
+  PUT_CODE(dctbl->ehufco[nbits], dctbl->ehufsi[nbits], diff)
+
+  /* Encode AC coefficients. */
+
+  unsigned int r = 0;  /* r = run length of zeros */
+  unsigned int i = 1;  /* i = number of coefficients encoded */
+  /* Code and size information for a run length of 16 zero coefficients */
+  const unsigned int code_0xf0 = actbl->ehufco[0xf0];
+  const unsigned int size_0xf0 = actbl->ehufsi[0xf0];
+
+  /* The most efficient method of computing nbits and diff depends on the
+   * number of non-zero coefficients.  If the bitmap is not too sparse (> 8
+   * non-zero AC coefficients), it is beneficial to do all of the work using
+   * Neon; else we do some of the work using Neon and the rest on demand using
+   * scalar code.
+   */
+  if (non_zero_coefficients > 8) {
+    uint8_t block_nbits[DCTSIZE2];
+
+    int16x8_t abs_row1 = vabsq_s16(row1);
+    int16x8_t abs_row2 = vabsq_s16(row2);
+    int16x8_t abs_row3 = vabsq_s16(row3);
+    int16x8_t abs_row4 = vabsq_s16(row4);
+    int16x8_t abs_row5 = vabsq_s16(row5);
+    int16x8_t abs_row6 = vabsq_s16(row6);
+    int16x8_t abs_row7 = vabsq_s16(row7);
+    int16x8_t row1_lz = vclzq_s16(abs_row1);
+    int16x8_t row2_lz = vclzq_s16(abs_row2);
+    int16x8_t row3_lz = vclzq_s16(abs_row3);
+    int16x8_t row4_lz = vclzq_s16(abs_row4);
+    int16x8_t row5_lz = vclzq_s16(abs_row5);
+    int16x8_t row6_lz = vclzq_s16(abs_row6);
+    int16x8_t row7_lz = vclzq_s16(abs_row7);
+    /* Narrow leading zero count to 8 bits. */
+    uint8x16_t row01_lz = vuzp1q_u8(vreinterpretq_u8_s16(row0_lz),
+                                    vreinterpretq_u8_s16(row1_lz));
+    uint8x16_t row23_lz = vuzp1q_u8(vreinterpretq_u8_s16(row2_lz),
+                                    vreinterpretq_u8_s16(row3_lz));
+    uint8x16_t row45_lz = vuzp1q_u8(vreinterpretq_u8_s16(row4_lz),
+                                    vreinterpretq_u8_s16(row5_lz));
+    uint8x16_t row67_lz = vuzp1q_u8(vreinterpretq_u8_s16(row6_lz),
+                                    vreinterpretq_u8_s16(row7_lz));
+    /* Compute nbits needed to specify magnitude of each coefficient. */
+    uint8x16_t row01_nbits = vsubq_u8(vdupq_n_u8(16), row01_lz);
+    uint8x16_t row23_nbits = vsubq_u8(vdupq_n_u8(16), row23_lz);
+    uint8x16_t row45_nbits = vsubq_u8(vdupq_n_u8(16), row45_lz);
+    uint8x16_t row67_nbits = vsubq_u8(vdupq_n_u8(16), row67_lz);
+    /* Store nbits. */
+    vst1q_u8(block_nbits + 0 * DCTSIZE, row01_nbits);
+    vst1q_u8(block_nbits + 2 * DCTSIZE, row23_nbits);
+    vst1q_u8(block_nbits + 4 * DCTSIZE, row45_nbits);
+    vst1q_u8(block_nbits + 6 * DCTSIZE, row67_nbits);
+    /* Mask bits not required to specify sign and amplitude of diff. */
+    uint16x8_t row1_mask = vshlq_u16(vcltzq_s16(row1), vnegq_s16(row1_lz));
+    uint16x8_t row2_mask = vshlq_u16(vcltzq_s16(row2), vnegq_s16(row2_lz));
+    uint16x8_t row3_mask = vshlq_u16(vcltzq_s16(row3), vnegq_s16(row3_lz));
+    uint16x8_t row4_mask = vshlq_u16(vcltzq_s16(row4), vnegq_s16(row4_lz));
+    uint16x8_t row5_mask = vshlq_u16(vcltzq_s16(row5), vnegq_s16(row5_lz));
+    uint16x8_t row6_mask = vshlq_u16(vcltzq_s16(row6), vnegq_s16(row6_lz));
+    uint16x8_t row7_mask = vshlq_u16(vcltzq_s16(row7), vnegq_s16(row7_lz));
+    /* diff = abs(coeff) ^ sign(coeff) [no-op for positive coefficients] */
+    uint16x8_t row1_diff = veorq_u16(vreinterpretq_u16_s16(abs_row1),
+                                     row1_mask);
+    uint16x8_t row2_diff = veorq_u16(vreinterpretq_u16_s16(abs_row2),
+                                     row2_mask);
+    uint16x8_t row3_diff = veorq_u16(vreinterpretq_u16_s16(abs_row3),
+                                     row3_mask);
+    uint16x8_t row4_diff = veorq_u16(vreinterpretq_u16_s16(abs_row4),
+                                     row4_mask);
+    uint16x8_t row5_diff = veorq_u16(vreinterpretq_u16_s16(abs_row5),
+                                     row5_mask);
+    uint16x8_t row6_diff = veorq_u16(vreinterpretq_u16_s16(abs_row6),
+                                     row6_mask);
+    uint16x8_t row7_diff = veorq_u16(vreinterpretq_u16_s16(abs_row7),
+                                     row7_mask);
+    /* Store diff bits. */
+    vst1q_u16(block_diff + 0 * DCTSIZE, row0_diff);
+    vst1q_u16(block_diff + 1 * DCTSIZE, row1_diff);
+    vst1q_u16(block_diff + 2 * DCTSIZE, row2_diff);
+    vst1q_u16(block_diff + 3 * DCTSIZE, row3_diff);
+    vst1q_u16(block_diff + 4 * DCTSIZE, row4_diff);
+    vst1q_u16(block_diff + 5 * DCTSIZE, row5_diff);
+    vst1q_u16(block_diff + 6 * DCTSIZE, row6_diff);
+    vst1q_u16(block_diff + 7 * DCTSIZE, row7_diff);
+
+    while (bitmap != 0) {
+      r = BUILTIN_CLZLL(bitmap);
+      i += r;
+      bitmap <<= r;
+      nbits = block_nbits[i];
+      diff = block_diff[i];
+      while (r > 15) {
+        /* If run length > 15, emit special run-length-16 codes. */
+        PUT_BITS(code_0xf0, size_0xf0)
+        r -= 16;
+      }
+      /* Emit Huffman symbol for run length / number of bits. (F.1.2.2.1) */
+      unsigned int rs = (r << 4) + nbits;
+      PUT_CODE(actbl->ehufco[rs], actbl->ehufsi[rs], diff)
+      i++;
+      bitmap <<= 1;
+    }
+  } else if (bitmap != 0) {
+    uint16_t block_abs[DCTSIZE2];
+    /* Compute and store absolute value of coefficients. */
+    int16x8_t abs_row1 = vabsq_s16(row1);
+    int16x8_t abs_row2 = vabsq_s16(row2);
+    int16x8_t abs_row3 = vabsq_s16(row3);
+    int16x8_t abs_row4 = vabsq_s16(row4);
+    int16x8_t abs_row5 = vabsq_s16(row5);
+    int16x8_t abs_row6 = vabsq_s16(row6);
+    int16x8_t abs_row7 = vabsq_s16(row7);
+    vst1q_u16(block_abs + 0 * DCTSIZE, vreinterpretq_u16_s16(abs_row0));
+    vst1q_u16(block_abs + 1 * DCTSIZE, vreinterpretq_u16_s16(abs_row1));
+    vst1q_u16(block_abs + 2 * DCTSIZE, vreinterpretq_u16_s16(abs_row2));
+    vst1q_u16(block_abs + 3 * DCTSIZE, vreinterpretq_u16_s16(abs_row3));
+    vst1q_u16(block_abs + 4 * DCTSIZE, vreinterpretq_u16_s16(abs_row4));
+    vst1q_u16(block_abs + 5 * DCTSIZE, vreinterpretq_u16_s16(abs_row5));
+    vst1q_u16(block_abs + 6 * DCTSIZE, vreinterpretq_u16_s16(abs_row6));
+    vst1q_u16(block_abs + 7 * DCTSIZE, vreinterpretq_u16_s16(abs_row7));
+    /* Compute diff bits (without nbits mask) and store. */
+    uint16x8_t row1_diff = veorq_u16(vreinterpretq_u16_s16(abs_row1),
+                                     vcltzq_s16(row1));
+    uint16x8_t row2_diff = veorq_u16(vreinterpretq_u16_s16(abs_row2),
+                                     vcltzq_s16(row2));
+    uint16x8_t row3_diff = veorq_u16(vreinterpretq_u16_s16(abs_row3),
+                                     vcltzq_s16(row3));
+    uint16x8_t row4_diff = veorq_u16(vreinterpretq_u16_s16(abs_row4),
+                                     vcltzq_s16(row4));
+    uint16x8_t row5_diff = veorq_u16(vreinterpretq_u16_s16(abs_row5),
+                                     vcltzq_s16(row5));
+    uint16x8_t row6_diff = veorq_u16(vreinterpretq_u16_s16(abs_row6),
+                                     vcltzq_s16(row6));
+    uint16x8_t row7_diff = veorq_u16(vreinterpretq_u16_s16(abs_row7),
+                                     vcltzq_s16(row7));
+    vst1q_u16(block_diff + 0 * DCTSIZE, row0_diff);
+    vst1q_u16(block_diff + 1 * DCTSIZE, row1_diff);
+    vst1q_u16(block_diff + 2 * DCTSIZE, row2_diff);
+    vst1q_u16(block_diff + 3 * DCTSIZE, row3_diff);
+    vst1q_u16(block_diff + 4 * DCTSIZE, row4_diff);
+    vst1q_u16(block_diff + 5 * DCTSIZE, row5_diff);
+    vst1q_u16(block_diff + 6 * DCTSIZE, row6_diff);
+    vst1q_u16(block_diff + 7 * DCTSIZE, row7_diff);
+
+    /* Same as above but must mask diff bits and compute nbits on demand. */
+    while (bitmap != 0) {
+      r = BUILTIN_CLZLL(bitmap);
+      i += r;
+      bitmap <<= r;
+      lz = BUILTIN_CLZ(block_abs[i]);
+      nbits = 32 - lz;
+      diff = ((unsigned int)block_diff[i] << lz) >> lz;
+      while (r > 15) {
+        /* If run length > 15, emit special run-length-16 codes. */
+        PUT_BITS(code_0xf0, size_0xf0)
+        r -= 16;
+      }
+      /* Emit Huffman symbol for run length / number of bits. (F.1.2.2.1) */
+      unsigned int rs = (r << 4) + nbits;
+      PUT_CODE(actbl->ehufco[rs], actbl->ehufsi[rs], diff)
+      i++;
+      bitmap <<= 1;
+    }
+  }
+
+  /* If the last coefficient(s) were zero, emit an end-of-block (EOB) code.
+   * The value of RS for the EOB code is 0.
+   */
+  if (i != 64) {
+    PUT_BITS(actbl->ehufco[0], actbl->ehufsi[0])
+  }
+
+  state_ptr->cur.put_buffer = put_buffer;
+  state_ptr->cur.free_bits = free_bits;
+
+  return buffer;
+}
--- a/3rdparty/libjpeg-turbo/src/simd/arm/aarch64/jsimd.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/aarch64/jsimd.c
--- a/3rdparty/libjpeg-turbo/src/simd/arm/aarch64/jsimd_neon.S
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/aarch64/jsimd_neon.S
--- a/3rdparty/libjpeg-turbo/src/simd/arm/align.h
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/align.h
+/*
+ * Copyright (C) 2020, Arm Limited.  All Rights Reserved.
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/* How to obtain memory alignment for structures and variables */
+#if defined(_MSC_VER)
+#define ALIGN(alignment)  __declspec(align(alignment))
+#elif defined(__clang__) || defined(__GNUC__)
+#define ALIGN(alignment)  __attribute__((aligned(alignment)))
+#else
+#error "Unknown compiler"
+#endif
--- a/3rdparty/libjpeg-turbo/src/simd/arm/jccolor-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jccolor-neon.c
--- a/3rdparty/libjpeg-turbo/src/simd/arm/jcgray-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jcgray-neon.c
--- a/3rdparty/libjpeg-turbo/src/simd/arm/jcgryext-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jcgryext-neon.c
--- a/3rdparty/libjpeg-turbo/src/simd/arm/jchuff.h
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jchuff.h
--- a/3rdparty/libjpeg-turbo/src/simd/arm/jcphuff-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jcphuff-neon.c
--- a/3rdparty/libjpeg-turbo/src/simd/arm/jcsample-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jcsample-neon.c
--- a/3rdparty/libjpeg-turbo/src/simd/arm/jdcolext-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jdcolext-neon.c
--- a/3rdparty/libjpeg-turbo/src/simd/arm/jdcolor-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jdcolor-neon.c
--- a/3rdparty/libjpeg-turbo/src/simd/arm/jdmerge-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jdmerge-neon.c
--- a/3rdparty/libjpeg-turbo/src/simd/arm/jdmrgext-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jdmrgext-neon.c
--- a/3rdparty/libjpeg-turbo/src/simd/arm/jdsample-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jdsample-neon.c
--- a/3rdparty/libjpeg-turbo/src/simd/arm/jfdctfst-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jfdctfst-neon.c
--- a/3rdparty/libjpeg-turbo/src/simd/arm/jfdctint-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jfdctint-neon.c
--- a/3rdparty/libjpeg-turbo/src/simd/arm/jidctfst-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jidctfst-neon.c
--- a/3rdparty/libjpeg-turbo/src/simd/arm/jidctint-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jidctint-neon.c
--- a/3rdparty/libjpeg-turbo/src/simd/arm/jidctred-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jidctred-neon.c
--- a/3rdparty/libjpeg-turbo/src/simd/arm/jquanti-neon.c
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/jquanti-neon.c
--- a/3rdparty/libjpeg-turbo/src/simd/arm/neon-compat.h.in
+++ b/3rdparty/libjpeg-turbo/src/simd/arm/neon-compat.h.in
+/*
+ * Copyright (C) 2020, D. R. Commander.  All Rights Reserved.
+ * Copyright (C) 2020-2021, Arm Limited.  All Rights Reserved.
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty.  In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ *    claim that you wrote the original software. If you use this software
+ *    in a product, an acknowledgment in the product documentation would be
+ *    appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ *    misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+#cmakedefine HAVE_VLD1_S16_X3
+#cmakedefine HAVE_VLD1_U16_X2
+#cmakedefine HAVE_VLD1Q_U8_X4
+
+/* Define compiler-independent count-leading-zeros and byte-swap macros */
+#if defined(_MSC_VER) && !defined(__clang__)
+#define BUILTIN_CLZ(x)  _CountLeadingZeros(x)
+#define BUILTIN_CLZLL(x)  _CountLeadingZeros64(x)
+#define BUILTIN_BSWAP64(x)  _byteswap_uint64(x)
+#elif defined(__clang__) || defined(__GNUC__)
+#define BUILTIN_CLZ(x)  __builtin_clz(x)
+#define BUILTIN_CLZLL(x)  __builtin_clzll(x)
+#define BUILTIN_BSWAP64(x)  __builtin_bswap64(x)
+#else
+#error "Unknown compiler"
+#endif
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jccolext-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jccolext-avx2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jccolext-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jccolext-mmx.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jccolext-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jccolext-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jccolor-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jccolor-avx2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jccolor-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jccolor-mmx.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jccolor-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jccolor-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jcgray-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jcgray-avx2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jcgray-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jcgray-mmx.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jcgray-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jcgray-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jcgryext-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jcgryext-avx2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jcgryext-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jcgryext-mmx.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jcgryext-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jcgryext-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jchuff-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jchuff-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jcphuff-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jcphuff-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jcsample-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jcsample-avx2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jcsample-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jcsample-mmx.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jcsample-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jcsample-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdcolext-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdcolext-avx2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdcolext-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdcolext-mmx.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdcolext-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdcolext-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdcolor-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdcolor-avx2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdcolor-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdcolor-mmx.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdcolor-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdcolor-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdmerge-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdmerge-avx2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdmerge-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdmerge-mmx.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdmerge-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdmerge-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdmrgext-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdmrgext-avx2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdmrgext-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdmrgext-mmx.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdmrgext-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdmrgext-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdsample-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdsample-avx2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdsample-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdsample-mmx.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jdsample-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jdsample-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jfdctflt-3dn.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jfdctflt-3dn.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jfdctflt-sse.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jfdctflt-sse.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jfdctfst-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jfdctfst-mmx.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jfdctfst-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jfdctfst-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jfdctint-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jfdctint-avx2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jfdctint-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jfdctint-mmx.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jfdctint-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jfdctint-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jidctflt-3dn.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jidctflt-3dn.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jidctflt-sse.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jidctflt-sse.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jidctflt-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jidctflt-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jidctfst-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jidctfst-mmx.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jidctfst-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jidctfst-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jidctint-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jidctint-avx2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jidctint-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jidctint-mmx.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jidctint-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jidctint-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jidctred-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jidctred-mmx.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jidctred-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jidctred-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jquant-3dn.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jquant-3dn.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jquant-mmx.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jquant-mmx.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jquant-sse.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jquant-sse.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jquantf-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jquantf-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jquanti-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jquanti-avx2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jquanti-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jquanti-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jsimd.c
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jsimd.c
--- a/3rdparty/libjpeg-turbo/src/simd/i386/jsimdcpu.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/i386/jsimdcpu.asm
--- a/3rdparty/libjpeg-turbo/src/simd/jsimd.h
+++ b/3rdparty/libjpeg-turbo/src/simd/jsimd.h
--- a/3rdparty/libjpeg-turbo/src/simd/mips/jsimd.c
+++ b/3rdparty/libjpeg-turbo/src/simd/mips/jsimd.c
--- a/3rdparty/libjpeg-turbo/src/simd/mips/jsimd_dspr2.S
+++ b/3rdparty/libjpeg-turbo/src/simd/mips/jsimd_dspr2.S
--- a/3rdparty/libjpeg-turbo/src/simd/mips/jsimd_dspr2_asm.h
+++ b/3rdparty/libjpeg-turbo/src/simd/mips/jsimd_dspr2_asm.h
--- a/3rdparty/libjpeg-turbo/src/simd/mips64/jccolext-mmi.c
+++ b/3rdparty/libjpeg-turbo/src/simd/mips64/jccolext-mmi.c
--- a/3rdparty/libjpeg-turbo/src/simd/mips64/jccolor-mmi.c
+++ b/3rdparty/libjpeg-turbo/src/simd/mips64/jccolor-mmi.c
--- a/3rdparty/libjpeg-turbo/src/simd/mips64/jcgray-mmi.c
+++ b/3rdparty/libjpeg-turbo/src/simd/mips64/jcgray-mmi.c
--- a/3rdparty/libjpeg-turbo/src/simd/mips64/jcgryext-mmi.c
+++ b/3rdparty/libjpeg-turbo/src/simd/mips64/jcgryext-mmi.c
--- a/3rdparty/libjpeg-turbo/src/simd/mips64/jcsample-mmi.c
+++ b/3rdparty/libjpeg-turbo/src/simd/mips64/jcsample-mmi.c
--- a/3rdparty/libjpeg-turbo/src/simd/mips64/jcsample.h
+++ b/3rdparty/libjpeg-turbo/src/simd/mips64/jcsample.h
--- a/3rdparty/libjpeg-turbo/src/simd/mips64/jdcolext-mmi.c
+++ b/3rdparty/libjpeg-turbo/src/simd/mips64/jdcolext-mmi.c
--- a/3rdparty/libjpeg-turbo/src/simd/mips64/jdcolor-mmi.c
+++ b/3rdparty/libjpeg-turbo/src/simd/mips64/jdcolor-mmi.c
--- a/3rdparty/libjpeg-turbo/src/simd/mips64/jdmerge-mmi.c
+++ b/3rdparty/libjpeg-turbo/src/simd/mips64/jdmerge-mmi.c
--- a/3rdparty/libjpeg-turbo/src/simd/mips64/jdmrgext-mmi.c
+++ b/3rdparty/libjpeg-turbo/src/simd/mips64/jdmrgext-mmi.c
--- a/3rdparty/libjpeg-turbo/src/simd/mips64/jdsample-mmi.c
+++ b/3rdparty/libjpeg-turbo/src/simd/mips64/jdsample-mmi.c
--- a/3rdparty/libjpeg-turbo/src/simd/mips64/jfdctfst-mmi.c
+++ b/3rdparty/libjpeg-turbo/src/simd/mips64/jfdctfst-mmi.c
--- a/3rdparty/libjpeg-turbo/src/simd/mips64/jfdctint-mmi.c
+++ b/3rdparty/libjpeg-turbo/src/simd/mips64/jfdctint-mmi.c
--- a/3rdparty/libjpeg-turbo/src/simd/mips64/jidctfst-mmi.c
+++ b/3rdparty/libjpeg-turbo/src/simd/mips64/jidctfst-mmi.c
--- a/3rdparty/libjpeg-turbo/src/simd/mips64/jidctint-mmi.c
+++ b/3rdparty/libjpeg-turbo/src/simd/mips64/jidctint-mmi.c
--- a/3rdparty/libjpeg-turbo/src/simd/mips64/jquanti-mmi.c
+++ b/3rdparty/libjpeg-turbo/src/simd/mips64/jquanti-mmi.c
--- a/3rdparty/libjpeg-turbo/src/simd/mips64/jsimd.c
+++ b/3rdparty/libjpeg-turbo/src/simd/mips64/jsimd.c
--- a/3rdparty/libjpeg-turbo/src/simd/mips64/jsimd_mmi.h
+++ b/3rdparty/libjpeg-turbo/src/simd/mips64/jsimd_mmi.h
--- a/3rdparty/libjpeg-turbo/src/simd/mips64/loongson-mmintrin.h
+++ b/3rdparty/libjpeg-turbo/src/simd/mips64/loongson-mmintrin.h
--- a/3rdparty/libjpeg-turbo/src/simd/nasm/jcolsamp.inc
+++ b/3rdparty/libjpeg-turbo/src/simd/nasm/jcolsamp.inc
--- a/3rdparty/libjpeg-turbo/src/simd/nasm/jdct.inc
+++ b/3rdparty/libjpeg-turbo/src/simd/nasm/jdct.inc
--- a/3rdparty/libjpeg-turbo/src/simd/nasm/jsimdcfg.inc
+++ b/3rdparty/libjpeg-turbo/src/simd/nasm/jsimdcfg.inc
--- a/3rdparty/libjpeg-turbo/src/simd/nasm/jsimdcfg.inc.h
+++ b/3rdparty/libjpeg-turbo/src/simd/nasm/jsimdcfg.inc.h
--- a/3rdparty/libjpeg-turbo/src/simd/nasm/jsimdext.inc
+++ b/3rdparty/libjpeg-turbo/src/simd/nasm/jsimdext.inc
--- a/3rdparty/libjpeg-turbo/src/simd/powerpc/jccolext-altivec.c
+++ b/3rdparty/libjpeg-turbo/src/simd/powerpc/jccolext-altivec.c
--- a/3rdparty/libjpeg-turbo/src/simd/powerpc/jccolor-altivec.c
+++ b/3rdparty/libjpeg-turbo/src/simd/powerpc/jccolor-altivec.c
--- a/3rdparty/libjpeg-turbo/src/simd/powerpc/jcgray-altivec.c
+++ b/3rdparty/libjpeg-turbo/src/simd/powerpc/jcgray-altivec.c
--- a/3rdparty/libjpeg-turbo/src/simd/powerpc/jcgryext-altivec.c
+++ b/3rdparty/libjpeg-turbo/src/simd/powerpc/jcgryext-altivec.c
--- a/3rdparty/libjpeg-turbo/src/simd/powerpc/jcsample-altivec.c
+++ b/3rdparty/libjpeg-turbo/src/simd/powerpc/jcsample-altivec.c
--- a/3rdparty/libjpeg-turbo/src/simd/powerpc/jcsample.h
+++ b/3rdparty/libjpeg-turbo/src/simd/powerpc/jcsample.h
--- a/3rdparty/libjpeg-turbo/src/simd/powerpc/jdcolext-altivec.c
+++ b/3rdparty/libjpeg-turbo/src/simd/powerpc/jdcolext-altivec.c
--- a/3rdparty/libjpeg-turbo/src/simd/powerpc/jdcolor-altivec.c
+++ b/3rdparty/libjpeg-turbo/src/simd/powerpc/jdcolor-altivec.c
--- a/3rdparty/libjpeg-turbo/src/simd/powerpc/jdmerge-altivec.c
+++ b/3rdparty/libjpeg-turbo/src/simd/powerpc/jdmerge-altivec.c
--- a/3rdparty/libjpeg-turbo/src/simd/powerpc/jdmrgext-altivec.c
+++ b/3rdparty/libjpeg-turbo/src/simd/powerpc/jdmrgext-altivec.c
--- a/3rdparty/libjpeg-turbo/src/simd/powerpc/jdsample-altivec.c
+++ b/3rdparty/libjpeg-turbo/src/simd/powerpc/jdsample-altivec.c
--- a/3rdparty/libjpeg-turbo/src/simd/powerpc/jfdctfst-altivec.c
+++ b/3rdparty/libjpeg-turbo/src/simd/powerpc/jfdctfst-altivec.c
--- a/3rdparty/libjpeg-turbo/src/simd/powerpc/jfdctint-altivec.c
+++ b/3rdparty/libjpeg-turbo/src/simd/powerpc/jfdctint-altivec.c
--- a/3rdparty/libjpeg-turbo/src/simd/powerpc/jidctfst-altivec.c
+++ b/3rdparty/libjpeg-turbo/src/simd/powerpc/jidctfst-altivec.c
--- a/3rdparty/libjpeg-turbo/src/simd/powerpc/jidctint-altivec.c
+++ b/3rdparty/libjpeg-turbo/src/simd/powerpc/jidctint-altivec.c
--- a/3rdparty/libjpeg-turbo/src/simd/powerpc/jquanti-altivec.c
+++ b/3rdparty/libjpeg-turbo/src/simd/powerpc/jquanti-altivec.c
--- a/3rdparty/libjpeg-turbo/src/simd/powerpc/jsimd.c
+++ b/3rdparty/libjpeg-turbo/src/simd/powerpc/jsimd.c
--- a/3rdparty/libjpeg-turbo/src/simd/powerpc/jsimd_altivec.h
+++ b/3rdparty/libjpeg-turbo/src/simd/powerpc/jsimd_altivec.h
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jccolext-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jccolext-avx2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jccolext-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jccolext-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jccolor-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jccolor-avx2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jccolor-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jccolor-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jcgray-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jcgray-avx2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jcgray-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jcgray-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jcgryext-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jcgryext-avx2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jcgryext-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jcgryext-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jchuff-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jchuff-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jcphuff-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jcphuff-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jcsample-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jcsample-avx2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jcsample-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jcsample-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jdcolext-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jdcolext-avx2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jdcolext-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jdcolext-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jdcolor-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jdcolor-avx2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jdcolor-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jdcolor-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jdmerge-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jdmerge-avx2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jdmerge-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jdmerge-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jdmrgext-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jdmrgext-avx2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jdmrgext-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jdmrgext-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jdsample-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jdsample-avx2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jdsample-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jdsample-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jfdctflt-sse.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jfdctflt-sse.asm
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jfdctfst-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jfdctfst-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jfdctint-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jfdctint-avx2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jfdctint-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jfdctint-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jidctflt-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jidctflt-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jidctfst-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jidctfst-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jidctint-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jidctint-avx2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jidctint-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jidctint-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jidctred-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jidctred-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jquantf-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jquantf-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jquanti-avx2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jquanti-avx2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jquanti-sse2.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jquanti-sse2.asm
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jsimd.c
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jsimd.c
--- a/3rdparty/libjpeg-turbo/src/simd/x86_64/jsimdcpu.asm
+++ b/3rdparty/libjpeg-turbo/src/simd/x86_64/jsimdcpu.asm