Experimental optimization of the bit copy algorithm with special cases

This commit is contained in:
Pavel Kirienko 2015-01-11 04:35:03 +03:00
parent 21c07b90a5
commit 20439bb397
2 changed files with 181 additions and 5 deletions

View File

@ -12,6 +12,8 @@
namespace uavcan
{
#if UAVCAN_TINY
/**
* This function implements fast copy of unaligned bit arrays. It isn't part of the library API, so it is not exported.
* @param src_org Source array
@ -22,6 +24,16 @@ namespace uavcan
*/
void bitarrayCopy(const unsigned char* src_org, unsigned src_offset, unsigned src_len,
unsigned char* dst_org, unsigned dst_offset);
#else
/**
* Special cases of @ref bitarrayCopy() - either source or destination must be aligned.
* These functions aren't part of the library API, so they are not exported.
*/
void bitarrayCopyAlignedToUnaligned(const unsigned char* src_org, unsigned src_len,
unsigned char* dst_org, unsigned dst_offset);
void bitarrayCopyUnalignedToAligned(const unsigned char* src_org, unsigned src_offset, unsigned src_len,
unsigned char* dst_org);
#endif
/**
* This class treats a chunk of memory as an array of bits.
@ -37,6 +49,7 @@ class UAVCAN_EXPORT BitStream
static inline unsigned bitlenToBytelen(unsigned bits) { return (bits + 7) / 8; }
#if UAVCAN_TINY
static inline void copyBitArrayAlignedToUnaligned(const uint8_t* src_org, unsigned src_len,
uint8_t* dst_org, unsigned dst_offset)
{
@ -50,6 +63,21 @@ class UAVCAN_EXPORT BitStream
bitarrayCopy(reinterpret_cast<const unsigned char*>(src_org), src_offset, src_len,
reinterpret_cast<unsigned char*>(dst_org), 0);
}
#else
static inline void copyBitArrayAlignedToUnaligned(const uint8_t* src_org, unsigned src_len,
uint8_t* dst_org, unsigned dst_offset)
{
bitarrayCopyAlignedToUnaligned(reinterpret_cast<const unsigned char*>(src_org), src_len,
reinterpret_cast<unsigned char*>(dst_org), dst_offset);
}
static inline void copyBitArrayUnalignedToAligned(const uint8_t* src_org, unsigned src_offset, unsigned src_len,
uint8_t* dst_org)
{
bitarrayCopyUnalignedToAligned(reinterpret_cast<const unsigned char*>(src_org), src_offset, src_len,
reinterpret_cast<unsigned char*>(dst_org));
}
#endif
public:
static const unsigned MaxBitsPerRW = MaxBytesPerRW * 8;

View File

@ -8,6 +8,13 @@
#include <climits>
#include <cstring>
#include <cstddef>
namespace uavcan
{
static const unsigned char reverse_mask[] = { 0x55U, 0x80U, 0xC0U, 0xE0U, 0xF0U, 0xF8U, 0xFCU, 0xFEU, 0xFFU };
static const unsigned char reverse_mask_xor[] = { 0xFFU, 0x7FU, 0x3FU, 0x1FU, 0x0FU, 0x07U, 0x03U, 0x01U, 0x00U };
#if UAVCAN_TINY
#define PREPARE_FIRST_COPY() \
do { \
@ -21,15 +28,10 @@
src_len = 0; \
} } while (0)
namespace uavcan
{
void bitarrayCopy(const unsigned char* src_org, unsigned src_offset, unsigned src_len,
unsigned char* dst_org, unsigned dst_offset)
{
static const unsigned char reverse_mask[] = { 0x55U, 0x80U, 0xC0U, 0xE0U, 0xF0U, 0xF8U, 0xFCU, 0xFEU, 0xFFU };
static const unsigned char reverse_mask_xor[] = { 0xFFU, 0x7FU, 0x3FU, 0x1FU, 0x0FU, 0x07U, 0x03U, 0x01U, 0x00U };
if (src_len > 0U)
{
const unsigned char *src = src_org + (src_offset / CHAR_BIT);
@ -118,4 +120,150 @@ void bitarrayCopy(const unsigned char* src_org, unsigned src_offset, unsigned sr
}
}
#else
/*
* Functions below were manually optimized in the most horrible way.
*/
void bitarrayCopyAlignedToUnaligned(const unsigned char* src_org, unsigned src_len,
unsigned char* dst_org, unsigned dst_offset)
{
if (src_len > 0U)
{
unsigned char* dst = dst_org + (dst_offset / CHAR_BIT);
const unsigned dst_offset_modulo = dst_offset % CHAR_BIT;
if (0U == dst_offset_modulo)
{
const unsigned byte_len = src_len / CHAR_BIT;
const unsigned src_len_modulo = src_len % CHAR_BIT;
if (byte_len > 0U)
{
(void)std::memcpy(dst, src_org, byte_len);
src_org += byte_len;
dst += byte_len;
}
if (src_len_modulo > 0U)
{
*dst &= reverse_mask_xor[src_len_modulo];
*dst |= reverse_mask[src_len_modulo] & *src_org;
}
}
else
{
const unsigned bit_diff_ls = CHAR_BIT - dst_offset_modulo;
unsigned char c =
static_cast<unsigned char>(*src_org >> dst_offset_modulo & reverse_mask_xor[dst_offset_modulo]);
if (src_len >= (CHAR_BIT - dst_offset_modulo))
{
*dst &= reverse_mask[dst_offset_modulo];
src_len -= CHAR_BIT - dst_offset_modulo;
}
else
{
*dst &= reverse_mask[dst_offset_modulo] | reverse_mask_xor[dst_offset_modulo + src_len + 1];
c &= reverse_mask[dst_offset_modulo + src_len];
src_len = 0;
}
*dst++ |= c;
int byte_len = int(src_len / CHAR_BIT);
while (--byte_len >= 0)
{
c = static_cast<unsigned char>(*src_org++ << bit_diff_ls);
c = static_cast<unsigned char>(c | (*src_org >> dst_offset_modulo));
*dst++ = c;
}
const unsigned src_len_modulo = src_len % CHAR_BIT;
if (src_len_modulo > 0U)
{
c = static_cast<unsigned char>(*src_org++ << bit_diff_ls);
c = static_cast<unsigned char>(c | (*src_org >> dst_offset_modulo));
c &= reverse_mask[src_len_modulo];
*dst &= reverse_mask_xor[src_len_modulo];
*dst |= c;
}
}
}
}
void bitarrayCopyUnalignedToAligned(const unsigned char* src_org, unsigned src_offset, unsigned src_len,
unsigned char* dst_org)
{
if (src_len > 0U)
{
const unsigned char* src = src_org + (src_offset / CHAR_BIT);
const unsigned src_offset_modulo = src_offset % CHAR_BIT;
if (src_offset_modulo == 0U)
{
const unsigned byte_len = src_len / CHAR_BIT;
const unsigned src_len_modulo = src_len % CHAR_BIT;
if (byte_len > 0U)
{
(void)std::memcpy(dst_org, src, byte_len);
src += byte_len;
dst_org += byte_len;
}
if (src_len_modulo > 0U)
{
*dst_org &= reverse_mask_xor[src_len_modulo];
*dst_org |= reverse_mask[src_len_modulo] & *src;
}
}
else
{
const unsigned bit_diff_rs = CHAR_BIT - src_offset_modulo;
unsigned char c = static_cast<unsigned char>(*src++ << src_offset_modulo);
c = static_cast<unsigned char>(c | (*src >> bit_diff_rs));
if (src_len >= CHAR_BIT)
{
*dst_org &= 0x55U;
src_len -= CHAR_BIT;
}
else
{
*dst_org &= 0x55U | reverse_mask_xor[src_len + 1];
c &= reverse_mask[src_len];
src_len = 0;
}
*dst_org++ |= c;
int byte_len = int(src_len / CHAR_BIT);
while (--byte_len >= 0)
{
c = static_cast<unsigned char>(*src++ << src_offset_modulo);
c = static_cast<unsigned char>(c | (*src >> bit_diff_rs));
*dst_org++ = c;
}
const unsigned src_len_modulo = src_len % CHAR_BIT;
if (src_len_modulo > 0U)
{
c = static_cast<unsigned char>(*src++ << src_offset_modulo);
c = static_cast<unsigned char>(c | (*src >> bit_diff_rs));
c &= reverse_mask[src_len_modulo];
*dst_org &= reverse_mask_xor[src_len_modulo];
*dst_org |= c;
}
}
}
}
#endif
}