This commit is contained in:
2026-06-14 19:09:18 +01:00
parent 14bd1a9271
commit 13fa90a0e9
3958 changed files with 999286 additions and 4 deletions
+65
View File
@@ -0,0 +1,65 @@
# SPDX-License-Identifier: Apache-2.0
# ----------------------------------------------------------------------------
# Copyright 2021 Arm Limited
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
# use this file except in compliance with the License. You may obtain a copy
# of the License at:
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations
# under the License.
# ----------------------------------------------------------------------------
# CMake configuration
cmake_minimum_required(VERSION 3.15)
include(ExternalProject)
project(astcencoder_example VERSION 1.1.0)
# Add the external project and pull out the project directories we need
# The default build is a native build which supports the highest level of SIMD
# exposed by the compiler when using default compiler flags. Add a single
# SIMD enable to the CMAKE_CACHE_ARGS option to force something specific, but
# remember to change the link library in target_link_libraries() to match.
#
# * Add "-DASTCENC_ISA_SSE2:String=ON" and link against "astcenc-sse2-static"
# * Add "-DASTCENC_ISA_SSE41:String=ON" and link against "astcenc-sse4.1-static"
# * Add "-DASTCENC_ISA_AVX2:String=ON" and link against "astcenc-avx2-static"
# * Add "-DASTCENC_ISA_NEON:String=ON" and link against "astcenc-neon-static"
ExternalProject_Add(astcencoder
GIT_REPOSITORY https://github.com/ARM-software/astc-encoder
GIT_TAG main
CMAKE_CACHE_ARGS -DASTCENC_CLI:STRING=OFF -DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
INSTALL_COMMAND "")
ExternalProject_Get_property(astcencoder
SOURCE_DIR)
ExternalProject_Get_property(astcencoder
BINARY_DIR)
# Build the command line
add_executable(astcenc_example astc_api_example.cpp)
# ... with astcencoder as a dependency
add_dependencies(astcenc_example astcencoder)
# ... with astcencoder Source dir on the include path
target_include_directories(astcenc_example
PRIVATE
${SOURCE_DIR}/Source)
# ... with astcencoder Binary dir on the library path and as a library dep
target_link_directories(astcenc_example
PRIVATE
${BINARY_DIR}/Source)
target_link_libraries(astcenc_example
PRIVATE
astcenc-native-static)
+55
View File
@@ -0,0 +1,55 @@
# Library usage example
This is a minimal example of using the astcenc codec as a library in another
project. This sample shows:
* How to include astcenc as an external project CMake dependency.
* How to use the API to compress and decompress an image.
For sake of simplicity the example application uses fixed compression settings,
reading an uncompressed LDR image, compressing using 6x6 blocks at medium
quality, and then decompressing and writing the decompressed image back to disk
as a PNG file.
## Building
:warning: For sake of simplicity the example CMake project uses the CMake
`ExternalProject` mechanism to import the astcenc project from GitHub. This is
trivial to integrate, but requires network access during the build to pull the
astcenc project.
Most users will want to store a copy of astcenc in a project sub-directory,
e.g. by using git submodules, and then use `add_subdirectory()` to include the
project in their build. This allows the user to directly use the astcenc core
library as a link requirement via `target_link_libraries()`, without the
additional plumbing that `ExternalProject` requires.
### Linux and macOS
From the `./Utils/Example` directory.
```
mkdir build
cd build
cmake -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release ..
make -j8
```
### Windows
From the `./Utils/Example` directory, in a Visual Studio command prompt.
```
mkdir build
cd build
cmake -G "NMake Makefiles" -DCMAKE_BUILD_TYPE=Release ..
nmake
```
## Running
From the build directory above.
```
astcenc_example <input.png> <output.png>
```
@@ -0,0 +1,156 @@
// SPDX-License-Identifier: Apache-2.0
// ----------------------------------------------------------------------------
// Copyright 2021-2024 Arm Limited
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
// of the License at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
// ----------------------------------------------------------------------------
// This is a minimal example of using the astcenc library.
//
// This sample shows how to include the astcenc library in your CMake project
// as an external dependency, and how to compress and decompress images using
// the C library API.
//
// For sake of clarity the command line exposed by the sample is minimalistic,
// and the compression uses a fixed set of options, but the code is commented
// to indicate where extension would be possible. Errors handling points are
// detected and logged, but resources are not cleaned up on error paths to keep
// the sample control path simple, so resources will leak on error.
#include <stdio.h>
#include "astcenc.h"
#define STB_IMAGE_IMPLEMENTATION
#include "ThirdParty/stb_image.h"
#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "ThirdParty/stb_image_write.h"
int main(int argc, char **argv)
{
// Parse command line
if (argc != 3)
{
printf("Usage:\n"
" %s <source> <dest>\n\n"
" <source> : Uncompressed LDR source image.\n"
" <dest> : Uncompressed LDR destination image (png).\n"
, argv[0]);
return 1;
}
// ------------------------------------------------------------------------
// For the purposes of this sample we hard-code the compressor settings
static const unsigned int thread_count = 1;
static const unsigned int block_x = 6;
static const unsigned int block_y = 6;
static const unsigned int block_z = 1;
static const astcenc_profile profile = ASTCENC_PRF_LDR;
static const float quality = ASTCENC_PRE_MEDIUM;
static const astcenc_swizzle swizzle {
ASTCENC_SWZ_R, ASTCENC_SWZ_G, ASTCENC_SWZ_B, ASTCENC_SWZ_A
};
// ------------------------------------------------------------------------
// Load input image, forcing 4 components
int image_x, image_y, image_c;
uint8_t *image_data = (uint8_t*)stbi_load(argv[1], &image_x, &image_y, &image_c, 4);
if (!image_data)
{
printf("Failed to load image \"%s\"\n", argv[1]);
return 1;
}
// Compute the number of ASTC blocks in each dimension
unsigned int block_count_x = (image_x + block_x - 1) / block_x;
unsigned int block_count_y = (image_y + block_y - 1) / block_y;
// ------------------------------------------------------------------------
// Initialize the default configuration for the block size and quality
astcenc_config config;
astcenc_error status;
status = astcenc_config_init(profile, block_x, block_y, block_z, quality, 0, &config);
if (status != ASTCENC_SUCCESS)
{
printf("ERROR: Codec config init failed: %s\n", astcenc_get_error_string(status));
return 1;
}
// ... power users can customize any config settings after calling
// config_init() and before calling context alloc().
// ------------------------------------------------------------------------
// Create a context based on the configuration
astcenc_context* context;
status = astcenc_context_alloc(&config, thread_count, &context);
if (status != ASTCENC_SUCCESS)
{
printf("ERROR: Codec context alloc failed: %s\n", astcenc_get_error_string(status));
return 1;
}
// ------------------------------------------------------------------------
// Compress the image
astcenc_image image;
image.dim_x = image_x;
image.dim_y = image_y;
image.dim_z = 1;
image.data_type = ASTCENC_TYPE_U8;
uint8_t* slices = image_data;
image.data = reinterpret_cast<void**>(&slices);
// Space needed for 16 bytes of output per compressed block
size_t comp_len = block_count_x * block_count_y * 16;
uint8_t* comp_data = new uint8_t[comp_len];
status = astcenc_compress_image(context, &image, &swizzle, comp_data, comp_len, 0);
if (status != ASTCENC_SUCCESS)
{
printf("ERROR: Codec compress failed: %s\n", astcenc_get_error_string(status));
return 1;
}
// ... the comp_data array contains the raw compressed data you would pass
// to the graphics API, or pack into a wrapper format such as a KTX file.
// If using multithreaded compression to sequentially compress multiple
// images you should reuse the same context, calling the function
// astcenc_compress_reset() between each image in the series.
// ------------------------------------------------------------------------
// Decompress the image
// Note we just reuse the image structure to store the output here ...
status = astcenc_decompress_image(context, comp_data, comp_len, &image, &swizzle, 0);
if (status != ASTCENC_SUCCESS)
{
printf("ERROR: Codec decompress failed: %s\n", astcenc_get_error_string(status));
return 1;
}
// If using multithreaded decompression to sequentially decompress multiple
// images you should reuse the same context, calling the function
// astcenc_decompress_reset() between each image in the series.
// ------------------------------------------------------------------------
// Store the result back to disk
stbi_write_png(argv[2], image_x, image_y, 4, image_data, 4 * image_x);
// ------------------------------------------------------------------------
// Cleanup library resources
stbi_image_free(image_data);
astcenc_context_free(context);
delete[] comp_data;
return 0;
}
+298
View File
@@ -0,0 +1,298 @@
// SPDX-License-Identifier: Apache-2.0
// ----------------------------------------------------------------------------
// Copyright 2021-2024 Arm Limited
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
// of the License at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
// ----------------------------------------------------------------------------
// This is a utility tool to test blend modes.
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include "astcenc_mathlib.h"
#define STB_IMAGE_IMPLEMENTATION
#include "ThirdParty/stb_image.h"
#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "ThirdParty/stb_image_write.h"
/**
* @brief Linearize an sRGB value.
*
* @return The linearized value.
*/
static float srgb_to_linear(
float a
) {
if (a <= 0.04045f)
{
return a * (1.0f / 12.92f);
}
return powf((a + 0.055f) * (1.0f / 1.055f), 2.4f);
}
/**
* @brief sRGB gamma-encode a linear value.
*
* @return The gamma encoded value.
*/
static float linear_to_srgb(
float a
) {
if (a <= 0.0031308f)
{
return a * 12.92f;
}
return 1.055f * powf(a, 1.0f / 2.4f) - 0.055f;
}
int main(int argc, char **argv)
{
// Parse command line
if (argc != 6)
{
printf("Usage: astc_blend_test <source> <dest> <format> <blend_mode> <filter>\n");
exit(1);
}
const char* src_file = argv[1];
const char* dst_file = argv[2];
bool use_linear = false;
if (!strcmp(argv[3], "linear"))
{
use_linear = true;
}
else if (!strcmp(argv[3], "srgb"))
{
use_linear = false;
}
else
{
printf("<format> must be either 'linear' or 'srgb'\n");
exit(1);
}
bool use_post_blend = false;
if (!strcmp(argv[4], "post"))
{
use_post_blend = true;
}
else if (!strcmp(argv[4], "pre"))
{
use_post_blend = false;
}
else
{
printf("<blend_mode> must be either 'post' or 'pre'\n");
exit(1);
}
bool use_filter = false;
if (!strcmp(argv[5], "on"))
{
use_filter = true;
}
else if (!strcmp(argv[5], "off"))
{
use_filter = false;
}
else
{
printf("<filter> must be either 'on' or 'off'\n");
exit(1);
}
// Load the input image
int dim_x;
int dim_y;
const uint8_t* data_in = stbi_load(src_file, &dim_x, &dim_y, nullptr, 4);
if (!data_in)
{
printf("ERROR: Failed to load input image.\n");
exit(1);
}
// Allocate the output image
uint8_t* data_out = (uint8_t*)malloc(4 * dim_y * dim_x);
if (!data_out)
{
printf("ERROR: Failed to allocate output image.\n");
exit(1);
}
// For each pixel blending and filtering
if (!use_filter)
{
for (int y = 0; y < dim_y; y++)
{
const uint8_t* row_in = data_in + (4 * dim_x * y);
uint8_t* row_out = data_out + (4 * dim_x * y);
for (int x = 0; x < dim_x; x++)
{
const uint8_t* pixel_in = row_in + 4 * x;
uint8_t* pixel_out = row_out + 4 * x;
float r_src = static_cast<float>(pixel_in[0]) / 255.0f;
float g_src = static_cast<float>(pixel_in[1]) / 255.0f;
float b_src = static_cast<float>(pixel_in[2]) / 255.0f;
float a_src = static_cast<float>(pixel_in[3]) / 255.0f;
if (use_linear == false)
{
r_src = srgb_to_linear(r_src);
g_src = srgb_to_linear(g_src);
b_src = srgb_to_linear(b_src);
}
float r_dst = 0.53f;
float g_dst = 0.53f;
float b_dst = 0.53f;
float r_out;
float g_out;
float b_out;
float a_out;
// Post-multiply blending
if (use_post_blend)
{
r_out = (r_dst * (1.0f - a_src)) + (r_src * a_src);
g_out = (g_dst * (1.0f - a_src)) + (g_src * a_src);
b_out = (b_dst * (1.0f - a_src)) + (b_src * a_src);
a_out = 1.0f;
}
// Pre-multiply blending
else
{
r_out = (r_dst * (1.0f - a_src)) + (r_src * 1.0f);
g_out = (g_dst * (1.0f - a_src)) + (g_src * 1.0f);
b_out = (b_dst * (1.0f - a_src)) + (b_src * 1.0f);
a_out = 1.0f;
}
// Clamp color between 0 and 1.0f
r_out = astc::min(r_out, 1.0f);
g_out = astc::min(g_out, 1.0f);
b_out = astc::min(b_out, 1.0f);
if (use_linear == false)
{
r_out = linear_to_srgb(r_out);
g_out = linear_to_srgb(g_out);
b_out = linear_to_srgb(b_out);
}
pixel_out[0] = (uint8_t)(r_out * 255.0f);
pixel_out[1] = (uint8_t)(g_out * 255.0f);
pixel_out[2] = (uint8_t)(b_out * 255.0f);
pixel_out[3] = (uint8_t)(a_out * 255.0f);
}
}
}
else
{
for (int y = 0; y < dim_y - 1; y++)
{
const uint8_t* row_in_0 = data_in + (4 * dim_x * y);
const uint8_t* row_in_1 = data_in + (4 * dim_x * (y + 1));
uint8_t* row_out = data_out + (4 * (dim_x - 1) * y);
for (int x = 0; x < dim_x - 1; x++)
{
const uint8_t* pixel_in_00 = row_in_0 + 4 * x;
const uint8_t* pixel_in_01 = row_in_0 + 4 * (x + 1);
const uint8_t* pixel_in_10 = row_in_1 + 4 * x;
const uint8_t* pixel_in_11 = row_in_1 + 4 * (x + 1);
uint8_t* pixel_out = row_out + 4 * x;
// Bilinear filter with a half-pixel offset
float r_src = static_cast<float>(pixel_in_00[0] + pixel_in_01[0] + pixel_in_10[0] + pixel_in_11[0]) / (255.0f * 4.0f);
float g_src = static_cast<float>(pixel_in_00[1] + pixel_in_01[1] + pixel_in_10[1] + pixel_in_11[1]) / (255.0f * 4.0f);
float b_src = static_cast<float>(pixel_in_00[2] + pixel_in_01[2] + pixel_in_10[2] + pixel_in_11[2]) / (255.0f * 4.0f);
float a_src = static_cast<float>(pixel_in_00[3] + pixel_in_01[3] + pixel_in_10[3] + pixel_in_11[3]) / (255.0f * 4.0f);
if (use_linear == false)
{
r_src = srgb_to_linear(r_src);
g_src = srgb_to_linear(g_src);
b_src = srgb_to_linear(b_src);
}
float r_dst = 0.8f;
float g_dst = 1.0f;
float b_dst = 0.8f;
float r_out;
float g_out;
float b_out;
float a_out;
// Post-multiply blending
if (use_post_blend)
{
r_out = (r_dst * (1.0f - a_src)) + (r_src * a_src);
g_out = (g_dst * (1.0f - a_src)) + (g_src * a_src);
b_out = (b_dst * (1.0f - a_src)) + (b_src * a_src);
a_out = 1.0f;
}
// Pre-multiply blending
else
{
r_out = (r_dst * (1.0f - a_src)) + (r_src * 1.0f);
g_out = (g_dst * (1.0f - a_src)) + (g_src * 1.0f);
b_out = (b_dst * (1.0f - a_src)) + (b_src * 1.0f);
a_out = 1.0f;
}
// Clamp color between 0 and 1.0f
r_out = astc::min(r_out, 1.0f);
g_out = astc::min(g_out, 1.0f);
b_out = astc::min(b_out, 1.0f);
if (use_linear == false)
{
r_out = linear_to_srgb(r_out);
g_out = linear_to_srgb(g_out);
b_out = linear_to_srgb(b_out);
}
pixel_out[0] = (uint8_t)(r_out * 255.0f);
pixel_out[1] = (uint8_t)(g_out * 255.0f);
pixel_out[2] = (uint8_t)(b_out * 255.0f);
pixel_out[3] = (uint8_t)(a_out * 255.0f);
}
}
}
// Write out the result
if (!use_filter)
{
stbi_write_png(dst_file, dim_x, dim_y, 4, data_out, 4 * dim_x);
}
else
{
stbi_write_png(dst_file, dim_x - 1, dim_y - 1, 4, data_out, 4 * (dim_x - 1));
}
return 0;
}
+406
View File
@@ -0,0 +1,406 @@
// SPDX-License-Identifier: Apache-2.0
// ----------------------------------------------------------------------------
// Copyright 2023 Arm Limited
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
// of the License at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
// ----------------------------------------------------------------------------
// This is a utility tool to generate quant tables
#include <algorithm>
#include <array>
#include <bitset>
#include <set>
/**
* @brief The ASTC quantization methods.
*
* Note, the values here are used directly in the encoding in the format so do not rearrange.
*/
enum quant_method
{
QUANT_2 = 0,
QUANT_3 = 1,
QUANT_4 = 2,
QUANT_5 = 3,
QUANT_6 = 4,
QUANT_8 = 5,
QUANT_10 = 6,
QUANT_12 = 7,
QUANT_16 = 8,
QUANT_20 = 9,
QUANT_24 = 10,
QUANT_32 = 11,
QUANT_40 = 12,
QUANT_48 = 13,
QUANT_64 = 14,
QUANT_80 = 15,
QUANT_96 = 16,
QUANT_128 = 17,
QUANT_160 = 18,
QUANT_192 = 19,
QUANT_256 = 20
};
static inline unsigned int get_quant_level(quant_method method)
{
switch (method)
{
case QUANT_2: return 2;
case QUANT_3: return 3;
case QUANT_4: return 4;
case QUANT_5: return 5;
case QUANT_6: return 6;
case QUANT_8: return 8;
case QUANT_10: return 10;
case QUANT_12: return 12;
case QUANT_16: return 16;
case QUANT_20: return 20;
case QUANT_24: return 24;
case QUANT_32: return 32;
case QUANT_40: return 40;
case QUANT_48: return 48;
case QUANT_64: return 64;
case QUANT_80: return 80;
case QUANT_96: return 96;
case QUANT_128: return 128;
case QUANT_160: return 160;
case QUANT_192: return 192;
case QUANT_256: return 256;
}
// Unreachable - the enum is fully described
return 0;
}
struct quant_config {
quant_method quant;
unsigned int bits;
unsigned int trits;
unsigned int quints;
unsigned int C;
unsigned int masks[6];
};
const std::array<quant_config, 17> quant_configs {{
{
QUANT_6,
1, 1, 0,
204,
{
0b000000000,
0b000000000,
0b000000000,
0b000000000,
0b000000000,
0b000000000
}
}, {
QUANT_8,
3, 0, 0,
0,
{ 0 }
}, {
QUANT_10,
1, 0, 1,
113,
{
0b000000000,
0b000000000,
0b000000000,
0b000000000,
0b000000000,
0b000000000
}
}, {
QUANT_12,
2, 1, 0,
93,
{
0b000000000,
0b100010110,
0b000000000,
0b000000000,
0b000000000,
0b000000000
}
}, {
QUANT_16,
4, 0, 0,
0,
{ 0 }
}, {
QUANT_20,
2, 0, 1,
54,
{
0b000000000,
0b100001100,
0b000000000,
0b000000000,
0b000000000,
0b000000000
}
}, {
QUANT_24,
3, 1, 0,
44,
{
0b000000000,
0b010000101,
0b100001010,
0b000000000,
0b000000000,
0b000000000
}
}, {
QUANT_32,
5, 0, 0,
0,
{ 0 }
},
{
QUANT_40,
3, 0, 1,
26,
{
0b000000000,
0b010000010,
0b100000101,
0b000000000,
0b000000000,
0b000000000
}
}, {
QUANT_48,
4, 1, 0,
22,
{
0b000000000,
0b001000001,
0b010000010,
0b100000100,
0b000000000,
0b000000000
}
}, {
QUANT_64,
6, 0, 0,
0,
{ 0 }
}, {
QUANT_80,
4, 0, 1,
13,
{
0b000000000,
0b001000000,
0b010000001,
0b100000010,
0b000000000,
0b000000000
}
}, {
QUANT_96,
5, 1, 0,
11,
{
0b000000000,
0b000100000,
0b001000000,
0b010000001,
0b100000010,
0b000000000
}
}, {
QUANT_128,
7, 0, 0,
0,
{ 0 }
}, {
QUANT_160,
5, 0, 1,
6,
{
0b000000000,
0b000100000,
0b001000000,
0b010000000,
0b100000001,
0b000000000
}
}, {
QUANT_192,
6, 1, 0,
5,
{
0b000000000,
0b000010000,
0b000100000,
0b001000000,
0b010000000,
0b100000001
}
}, {
QUANT_256,
8, 0, 0,
0,
{ 0 }
}
}};
void generate_unpacked_quant(
const quant_config& config,
std::set<unsigned int>& set
) {
unsigned int levels = get_quant_level(config.quant);
unsigned int emitted = 0;
// Value has 1 trit and N bits
if (config.trits)
{
for (unsigned int D = 0; D < 3; D++)
{
unsigned int max_bits = 1 << config.bits;
for (unsigned int bits = 0; bits < max_bits; bits++)
{
unsigned int A = (bits & 1) * 0b111111111;
unsigned int B = 0;
unsigned int bit = bits;
for (const auto& mask_n: config.masks)
{
unsigned int bit_n = bit & 1;
bit >>= 1;
B += bit_n * mask_n;
}
unsigned int T = D * config.C + B;
T = T ^ A;
T = (A & 0x80) | (T >> 2);
set.insert(T);
}
}
}
// Value has 1 quint and N bits
else if (config.quints)
{
for (unsigned int D = 0; D < 5; D++)
{
unsigned int max_bits = 1 << config.bits;
for (unsigned int bits = 0; bits < max_bits; bits++)
{
unsigned int A = (bits & 1) * 0b111111111;
unsigned int B = 0;
unsigned int bit = bits;
for (const auto& mask_n: config.masks)
{
unsigned int bit_n = bit & 1;
bit >>= 1;
B += bit_n * mask_n;
}
unsigned int T = D * config.C + B;
T = T ^ A;
T = (A & 0x80) | (T >> 2);
set.insert(T);
}
}
}
// Value has N bits
else
{
unsigned int max_bits = 1 << config.bits;
for (unsigned int bits = 0; bits < max_bits; bits++)
{
unsigned int T = bits << (8 - config.bits);
int bits_remaining = 8 - config.bits;
while (bits_remaining > 0)
{
int shift = bits_remaining - config.bits;
bits_remaining -= config.bits;
if (shift > 0)
{
T |= bits << shift;
}
else
{
T |= bits >> -shift;
}
}
set.insert(T);
}
}
}
void generate_unquant_to_unpacked_quant(
const quant_config& config,
const std::set<unsigned int>& set
) {
for (unsigned int i = 0; i < 256; i++)
{
unsigned int min_dist = 256;
unsigned int val_lo = 256;
unsigned int val_hi = 0;
for (const auto& val: set)
{
unsigned int dist = std::max(i, val) - std::min(i, val);
if (dist < min_dist)
{
min_dist = dist;
val_lo = val;
val_hi = val;
}
else if (dist == min_dist)
{
val_lo = std::min(val_lo, val);
val_hi = std::max(val_hi, val);
}
}
if ((i % 16) == 0)
{
printf("\t\t");
}
printf("%3u, %3u", val_lo, val_hi);
if (i != 255)
{
printf(", ");
}
if ((i % 16) == 15)
{
printf("\n");
}
}
}
int main(void)
{
printf("const uint8_t color_unquant_to_uquant_tables[17][512] {\n");
for (size_t i = 0; i < quant_configs.size(); i++)
{
const auto& config = quant_configs[i];
std::set<unsigned int> set;
printf("\t{ // QUANT_%u\n", get_quant_level(config.quant));
generate_unpacked_quant(config, set);
generate_unquant_to_unpacked_quant(config, set);
printf("\t},\n");
}
printf("};\n");
return 0;
}
+173
View File
@@ -0,0 +1,173 @@
// SPDX-License-Identifier: Apache-2.0
// ----------------------------------------------------------------------------
// Copyright 2021 Arm Limited
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
// of the License at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
// ----------------------------------------------------------------------------
// This is a utility tool to encode HDR into RGBM, or decode RGBM into HDR.
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include "astcenc_mathlib.h"
#define STB_IMAGE_IMPLEMENTATION
#include "ThirdParty/stb_image.h"
#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "ThirdParty/stb_image_write.h"
#define MODE_ENCODE 0
#define MODE_DECODE 1
int main(int argc, char **argv)
{
// Parse command line
if (argc != 6)
{
printf("Usage: astc_rgbm_codec [-ch|-dh] <M> <low_clamp> <source> <dest>\n");
exit(1);
}
int opmode;
if (strcmp(argv[1], "-ch") == 0)
{
opmode = MODE_ENCODE;
}
else if (strcmp(argv[1], "-dh") == 0)
{
opmode = MODE_DECODE;
}
else
{
printf("ERROR: Bad operation mode\n");
exit(1);
}
float rgbm_multiplier = atof(argv[2]);
float low_clamp = atof(argv[3]);
const char* src_file = argv[4];
const char* dst_file = argv[5];
// Convert an HDR input file into an RGBM encoded LDR file
if (opmode == MODE_ENCODE)
{
// Load the input image
int dim_x;
int dim_y;
const float* data_in = stbi_loadf(src_file, &dim_x, &dim_y, nullptr, 4);
if (!data_in)
{
printf("ERROR: Failed to load input image.\n");
exit(1);
}
// Allocate the output image
uint8_t* data_out = (uint8_t*)malloc(4 * dim_y * dim_x);
if (!data_out)
{
printf("ERROR: Failed to allow output image.\n");
exit(1);
}
// For each pixel apply RGBM encoding
for (int y = 0; y < dim_y; y++)
{
const float* row_in = data_in + (4 * dim_x * y);
uint8_t* row_out = data_out + (4 * dim_x * y);
for (int x = 0; x < dim_x; x++)
{
const float* pixel_in = row_in + 4 * x;
uint8_t* pixel_out = row_out + 4 * x;
float r_in = pixel_in[0] / rgbm_multiplier;
float g_in = pixel_in[1] / rgbm_multiplier;
float b_in = pixel_in[2] / rgbm_multiplier;
float max_rgb = astc::max(r_in, g_in, b_in);
// Ensure we always round up to next largest M
float m_scale = astc::min(1.0f, ceil(max_rgb * 255.0f) / 255.0f);
// But keep well above zero to avoid clamps in the compressor
m_scale = astc::max(m_scale, low_clamp / 255.0f);
float r_scale = astc::min(1.0f, r_in / m_scale);
float g_scale = astc::min(1.0f, g_in / m_scale);
float b_scale = astc::min(1.0f, b_in / m_scale);
pixel_out[0] = (uint8_t)(r_scale * 255.0f);
pixel_out[1] = (uint8_t)(g_scale * 255.0f);
pixel_out[2] = (uint8_t)(b_scale * 255.0f);
pixel_out[3] = (uint8_t)(m_scale * 255.0f);
}
}
// Write out the result
stbi_write_png(dst_file, dim_x, dim_y, 4, data_out, 4 * dim_x);
}
// Convert an RGBM encoded LDR file into an HDR file
else
{
// Load the input image
int dim_x;
int dim_y;
const uint8_t* data_in = stbi_load(src_file, &dim_x, &dim_y, nullptr, 4);
if (!data_in)
{
printf("ERROR: Failed to load input image.\n");
exit(1);
}
// Allocate the output image
float* data_out = (float*)malloc(4 * dim_y * dim_x * sizeof(float));
if (!data_out)
{
printf("ERROR: Failed to allow output image.\n");
exit(1);
}
// For each pixel apply RGBM decoding
for (int y = 0; y < dim_y; y++)
{
const uint8_t* row_in = data_in + (4 * dim_x * y);
float* row_out = data_out + (4 * dim_x * y);
for (int x = 0; x < dim_x; x++)
{
const uint8_t* pixel_in = row_in + 4 * x;
float* pixel_out = row_out + 4 * x;
float r_scale = ((float)pixel_in[0]) / 255.0f;
float g_scale = ((float)pixel_in[1]) / 255.0f;
float b_scale = ((float)pixel_in[2]) / 255.0f;
float m_scale = ((float)pixel_in[3]) / 255.0f;
pixel_out[0] = r_scale * (m_scale * rgbm_multiplier);
pixel_out[1] = g_scale * (m_scale * rgbm_multiplier);
pixel_out[2] = b_scale * (m_scale * rgbm_multiplier);
pixel_out[3] = 1.0f;
}
}
// Write out the result
stbi_write_hdr(dst_file, dim_x, dim_y, 4, data_out);
}
return 0;
}
@@ -0,0 +1,203 @@
// SPDX-License-Identifier: Apache-2.0
// ----------------------------------------------------------------------------
// Copyright 2021 Arm Limited
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
// of the License at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
// ----------------------------------------------------------------------------
// Overview
// ========
//
// This is a utility tool to automatically generate single tile test vectors
// out of a larger test image. This tool takes three input images:
//
// - the uncompressed referenced,
// - the known-good compressed reference,
// - a new compressed image.
//
// The two compressed images are compared block-by-block, and if any block
// differences are found the worst block is extracted from the uncompressed
// reference and written back to disk as a single tile output image.
//
// Limitations
// ===========
//
// This tool only currently supports 2D LDR images.
//
// Build
// =====
//
// g++ astc_test_autoextract.cpp -I../Source -o astc_test_autoextract
#include <stdio.h>
#include <stdlib.h>
#define STB_IMAGE_IMPLEMENTATION
#include "ThirdParty/stb_image.h"
#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "ThirdParty/stb_image_write.h"
/**
* @brief Compute the array offset in a 2D image
*/
int pix(int x_pix, int y_idx, int x_idx, int chans, int p_idx)
{
return ((y_idx * x_pix) + x_idx) * chans + p_idx;
}
int main(int argc, char **argv)
{
// Parse command line
if (argc < 6)
{
printf("Usage: astc_test_extract <blocksize> <ref> <good> <bad> <out>\n");
return 1;
}
int blockdim_x, blockdim_y;
if (sscanf(argv[1], "%dx%d", &blockdim_x, &blockdim_y) < 2)
{
printf("blocksize must be of form WxH; e.g. 8x4\n");
return 1;
}
// Load the original reference image
int ref_dim_x, ref_dim_y, ref_ncomp;
uint8_t* data_ref = (uint8_t*)stbi_load(argv[2], &ref_dim_x, &ref_dim_y, &ref_ncomp, 4);
if (!data_ref)
{
printf("Failed to load reference image.\n");
return 1;
}
// Load the good test image
int good_dim_x, good_dim_y, good_ncomp;
uint8_t* data_good = (uint8_t*)stbi_load(argv[3], &good_dim_x, &good_dim_y, &good_ncomp, 4);
if (!data_good)
{
printf("Failed to load good test image.\n");
return 1;
}
// Load the bad test image
int bad_dim_x, bad_dim_y, bad_ncomp;
uint8_t* data_bad = (uint8_t*)stbi_load(argv[4], &bad_dim_x, &bad_dim_y, &bad_ncomp, 4);
if (!data_bad)
{
printf("Failed to load bad test image.\n");
return 1;
}
if (ref_dim_x != good_dim_x || ref_dim_x != bad_dim_x ||
ref_dim_y != good_dim_y || ref_dim_y != bad_dim_y)
{
printf("Failed as images are different resolutions.\n");
return 1;
}
int x_blocks = (ref_dim_x + blockdim_x - 1) / blockdim_x;
int y_blocks = (ref_dim_y + blockdim_y - 1) / blockdim_y;
int *errorsums = (int*)malloc(x_blocks * y_blocks * 4);
for (int i = 0; i < x_blocks * y_blocks; i++)
{
errorsums[i] = 0;
}
// Diff the two test images to find blocks that differ
for (int y = 0; y < ref_dim_y; y++)
{
for (int x = 0; x < ref_dim_x; x++)
{
int x_block = x / blockdim_x;
int y_block = y / blockdim_y;
int r_gd = data_good[pix(ref_dim_x, y, x, 4, 0)];
int g_gd = data_good[pix(ref_dim_x, y, x, 4, 1)];
int b_gd = data_good[pix(ref_dim_x, y, x, 4, 2)];
int a_gd = data_good[pix(ref_dim_x, y, x, 4, 3)];
int r_bd = data_bad[pix(ref_dim_x, y, x, 4, 0)];
int g_bd = data_bad[pix(ref_dim_x, y, x, 4, 1)];
int b_bd = data_bad[pix(ref_dim_x, y, x, 4, 2)];
int a_bd = data_bad[pix(ref_dim_x, y, x, 4, 3)];
int r_diff = (r_gd - r_bd) * (r_gd - r_bd);
int g_diff = (g_gd - g_bd) * (g_gd - g_bd);
int b_diff = (b_gd - b_bd) * (b_gd - b_bd);
int a_diff = (a_gd - a_bd) * (a_gd - a_bd);
int diff = r_diff + g_diff + b_diff + a_diff;
errorsums[pix(x_blocks, y_block, x_block, 1, 0)] += diff;
}
}
// Diff the two test images to find blocks that differ
float worst_error = 0.0f;
int worst_x_block = 0;
int worst_y_block = 0;
for (int y = 0; y < y_blocks; y++)
{
for (int x = 0; x < x_blocks; x++)
{
float error = errorsums[pix(x_blocks, y, x, 1, 0)];
if (error > worst_error)
{
worst_error = error;
worst_x_block = x;
worst_y_block = y;
}
}
}
if (worst_error == 0.0f)
{
printf("No block errors found\n");
}
else
{
int start_y = worst_y_block * blockdim_y;
int start_x = worst_x_block * blockdim_x;
int end_y = (worst_y_block + 1) * blockdim_y;
int end_x = (worst_x_block + 1) * blockdim_x;
if (end_x > ref_dim_x)
{
end_x = ref_dim_x;
}
if (end_y > ref_dim_y)
{
end_y = ref_dim_y;
}
int outblk_x = end_x - start_x;
int outblk_y = end_y - start_y;
printf("Block errors found at ~(%u, %u) px\n", start_x, start_y);
// Write out the worst bad block (from original reference)
uint8_t* data_out = &(data_ref[pix(ref_dim_x, start_y, start_x, 4, 0)]);
stbi_write_png(argv[5], outblk_x, outblk_y, 4, data_out, 4 * ref_dim_x);
}
free(errorsums);
stbi_image_free(data_ref);
stbi_image_free(data_good);
stbi_image_free(data_bad);
return 0;
}
@@ -0,0 +1,216 @@
// SPDX-License-Identifier: Apache-2.0
// ----------------------------------------------------------------------------
// Copyright 2021 Arm Limited
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
// of the License at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
// ----------------------------------------------------------------------------
// Overview
// ========
//
// This is a utility tool to automatically generate single tile test vectors
// out of a larger test image. This tool takes three input images:
//
// - the uncompressed referenced,
// - the known-good compressed reference,
// - a new compressed image.
//
// The two compressed images are compared block-by-block, and if any block
// differences are found the worst block is extracted from the uncompressed
// reference and written back to disk as a single tile output image.
//
// Limitations
// ===========
//
// This tool only currently supports 2D LDR images.
//
// Build
// =====
//
// g++ astc_test_autoextract_hdr.cpp -I../Source -o astc_test_autoextract_hdr
#include <stdio.h>
#include <stdlib.h>
#define STB_IMAGE_IMPLEMENTATION
#include "ThirdParty/stb_image.h"
#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "ThirdParty/stb_image_write.h"
/**
* @brief Compute the array offset in a 2D image
*/
int pix(int x_pix, int y_idx, int x_idx, int chans, int p_idx)
{
return ((y_idx * x_pix) + x_idx) * chans + p_idx;
}
int main(int argc, char **argv)
{
// Parse command line
if (argc < 6)
{
printf("Usage: astc_test_extract <blocksize> <ref> <good> <bad> <out>\n");
return 1;
}
int blockdim_x, blockdim_y;
if (sscanf(argv[1], "%dx%d", &blockdim_x, &blockdim_y) < 2)
{
printf("blocksize must be of form WxH; e.g. 8x4\n");
return 1;
}
// Load the original reference image
int ref_dim_x, ref_dim_y, ref_ncomp;
float* data_ref = (float*)stbi_loadf(argv[2], &ref_dim_x, &ref_dim_y, &ref_ncomp, 4);
if (!data_ref)
{
printf("Failed to load reference image.\n");
return 1;
}
// Load the good test image
int good_dim_x, good_dim_y, good_ncomp;
float* data_good = (float*)stbi_loadf(argv[3], &good_dim_x, &good_dim_y, &good_ncomp, 4);
if (!data_good)
{
printf("Failed to load good test image.\n");
return 1;
}
// Load the bad test image
int bad_dim_x, bad_dim_y, bad_ncomp;
float* data_bad = (float*)stbi_loadf(argv[4], &bad_dim_x, &bad_dim_y, &bad_ncomp, 4);
if (!data_bad)
{
printf("Failed to load bad test image.\n");
return 1;
}
if (ref_dim_x != good_dim_x || ref_dim_x != bad_dim_x ||
ref_dim_y != good_dim_y || ref_dim_y != bad_dim_y)
{
printf("Failed as images are different resolutions.\n");
return 1;
}
int x_blocks = (ref_dim_x + blockdim_x - 1) / blockdim_x;
int y_blocks = (ref_dim_y + blockdim_y - 1) / blockdim_y;
float* errorsums = (float*)malloc(x_blocks * y_blocks * 4);
for (int i = 0; i < x_blocks * y_blocks; i++)
{
errorsums[i] = 0;
}
// Diff the two test images to find blocks that differ
for (int y = 0; y < ref_dim_y; y++)
{
for (int x = 0; x < ref_dim_x; x++)
{
int x_block = x / blockdim_x;
int y_block = y / blockdim_y;
float r_gd = data_good[pix(ref_dim_x, y, x, 4, 0)];
float g_gd = data_good[pix(ref_dim_x, y, x, 4, 1)];
float b_gd = data_good[pix(ref_dim_x, y, x, 4, 2)];
float a_gd = data_good[pix(ref_dim_x, y, x, 4, 3)];
float r_bd = data_bad[pix(ref_dim_x, y, x, 4, 0)];
float g_bd = data_bad[pix(ref_dim_x, y, x, 4, 1)];
float b_bd = data_bad[pix(ref_dim_x, y, x, 4, 2)];
float a_bd = data_bad[pix(ref_dim_x, y, x, 4, 3)];
float r_diff = (r_gd - r_bd) * (r_gd - r_bd);
float g_diff = (g_gd - g_bd) * (g_gd - g_bd);
float b_diff = (b_gd - b_bd) * (b_gd - b_bd);
float a_diff = (a_gd - a_bd) * (a_gd - a_bd);
float diff = abs(r_diff) + abs(g_diff) + abs(b_diff) + abs(a_diff);
errorsums[pix(x_blocks, y_block, x_block, 1, 0)] += diff;
}
}
// Diff the two test images to find blocks that differ
float worst_error = 0.0f;
int worst_x_block = 0;
int worst_y_block = 0;
for (int y = 0; y < y_blocks; y++)
{
for (int x = 0; x < x_blocks; x++)
{
float error = errorsums[pix(x_blocks, y, x, 1, 0)];
if (error > worst_error)
{
worst_error = error;
worst_x_block = x;
worst_y_block = y;
}
}
}
if (worst_error == 0.0f)
{
printf("No block errors found\n");
}
else
{
int start_y = worst_y_block * blockdim_y;
int start_x = worst_x_block * blockdim_x;
int end_y = (worst_y_block + 1) * blockdim_y;
int end_x = (worst_x_block + 1) * blockdim_x;
if (end_x > ref_dim_x)
{
end_x = ref_dim_x;
}
if (end_y > ref_dim_y)
{
end_y = ref_dim_y;
}
int outblk_x = end_x - start_x;
int outblk_y = end_y - start_y;
printf("Block errors found at ~(%u, %u) px\n", start_x, start_y);
float* data_out = (float*)malloc(blockdim_x * blockdim_y * 4 * 4);
for (int y = 0; y < outblk_y; y++)
{
for (int x = 0; x < outblk_x; x++)
{
data_out[(y * outblk_x * 4) + (x * 4) + 0] = data_ref[((start_y + y) * ref_dim_x * 4) + ((start_x + x) * 4) + 0];
data_out[(y * outblk_x * 4) + (x * 4) + 1] = data_ref[((start_y + y) * ref_dim_x * 4) + ((start_x + x) * 4) + 1];
data_out[(y * outblk_x * 4) + (x * 4) + 2] = data_ref[((start_y + y) * ref_dim_x * 4) + ((start_x + x) * 4) + 2];
data_out[(y * outblk_x * 4) + (x * 4) + 3] = data_ref[((start_y + y) * ref_dim_x * 4) + ((start_x + x) * 4) + 3];
}
}
// Write out the worst bad block (from original reference)
stbi_write_hdr(argv[5], outblk_x, outblk_y, 4, data_out);
free(data_out);
}
free(errorsums);
stbi_image_free(data_ref);
stbi_image_free(data_good);
stbi_image_free(data_bad);
return 0;
}
@@ -0,0 +1,117 @@
// SPDX-License-Identifier: Apache-2.0
// ----------------------------------------------------------------------------
// Copyright 2023 Arm Limited
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy
// of the License at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations
// under the License.
// ----------------------------------------------------------------------------
// astcenc doesn't use the top 8 integer bits directly for sRGB RGB components
// or when using the decode_unorm8 decode mode. An alterantive is used which
// allows a common code path to be used. This test program shows that the two
// produce equivalent output once rounded to a decode_unorm8 output.
// Compile with e.g. clang++ astcenc_u8_test_bench.cpp -o astcenc_u8_test_bench -mavx2 -mf16c
#define ASTCENC_AVX 2
#define ASTCENC_F16C 1
#define ASTCENC_SSE 41
#include "../Source/astcenc_mathlib.cpp"
#include "../Source/astcenc_color_unquantize.cpp"
#include "../Source/astcenc_decompress_symbolic.cpp"
int main()
{
printf("Decode mode test bench\n");
for (int ep0 = 0; ep0 < 256; ep0++)
{
for (int ep1 = 0; ep1 < 256; ep1++)
{
for (int wt1 = 0; wt1 < 65; wt1++)
{
// Validate linear data with decode_unorm8 mode
{
// Expand 8 bit to 16 bit
vint4 weights(wt1);
int ep0_v0 = ep0 * 257;
int ep1_v0 = ep1 * 257;
// Linear with decode_u8 handling
vmask4 decode_u8_v0(true, true, true, true);
vint4 ep0v0(ep0_v0, ep0_v0, ep0_v0, ep0_v0);
vint4 ep1v0(ep1_v0, ep1_v0, ep1_v0, ep1_v0);
// Linear without decode_u8 handling
vmask4 decode_u8_v1(false, false, false, false);
vint4 ep0v1(ep0_v0, ep0_v0, ep0_v0, ep0_v0);
vint4 ep1v1(ep1_v0, ep1_v0, ep1_v0, ep1_v0);
// Lerp both styles
vint4 colorv0 = lerp_color_int(decode_u8_v0, ep0v0, ep1v0, weights);
vint4 colorv1 = lerp_color_int(decode_u8_v1, ep0v1, ep1v1, weights);
// Validate top 8 integer bits match in both cases
// - Shows that astcenc-style U8 doesn't differ from Khronos-style U8
vint4 cs0 = lsr<8>(colorv0);
vint4 cs1 = lsr<8>(colorv1);
assert(cs0.lane<0>() == cs1.lane<0>());
assert(cs0.lane<3>() == cs1.lane<3>());
// Validate that astcenc output matches the top 8 integer bits
vfloat4 colorv0f = decode_texel(colorv0, vmask4(false));
vint4 colorv0_out = float_to_int_rtn(colorv0f * 255.0f);
assert(colorv0_out.lane<0>() == cs0.lane<0>());
}
// Validate sRGB data with decode_unorm8 mode
{
// Expand 8 bit to 16 bit
vint4 weights(wt1);
int ep0_v0s = (ep0 << 8) | 0x80;
int ep1_v0s = (ep1 << 8) | 0x80;
int ep0_v0 = ep0 * 257;
int ep1_v0 = ep1 * 257;
// sRGB RGB and linear A with decode_u8 handling
vmask4 decode_u8_v0(true, true, true, true);
vint4 ep0v0(ep0_v0s, ep0_v0s, ep0_v0s, ep0_v0);
vint4 ep1v0(ep1_v0s, ep1_v0s, ep1_v0s, ep1_v0);
// sRGB RGB and linear A without decode_u8 handling
vmask4 decode_u8_v1(false, false, false, false);
vint4 ep0v1(ep0_v0s, ep0_v0s, ep0_v0s, ep0_v0);
vint4 ep1v1(ep1_v0s, ep1_v0s, ep1_v0s, ep1_v0);
// Lerp both styles
vint4 colorv0 = lerp_color_int(decode_u8_v0, ep0v0, ep1v0, weights);
vint4 colorv1 = lerp_color_int(decode_u8_v1, ep0v1, ep1v1, weights);
// Validate top 8 integer bits match in both cases
// - Shows that astcenc-style U8 doesn't differ from Khronos-style U8
vint4 cs0 = lsr<8>(colorv0);
vint4 cs1 = lsr<8>(colorv1);
assert(cs0.lane<0>() == cs1.lane<0>());
assert(cs0.lane<3>() == cs1.lane<3>());
// Validate that astcenc output matches the top 8 integer bits
vfloat4 colorv0f = decode_texel(colorv0, vmask4(false));
vint4 colorv0_out = float_to_int_rtn(colorv0f * 255.0f);
assert(colorv0_out.lane<0>() == cs0.lane<0>());
}
}
}
}
return 0;
}