#ifndef _TT_STRING32_H_
#define _TT_STRING32_H_

#include <core/cdefs.h>

/****************************************************************************************************************/

#if defined(__cplusplus)
extern "C" {
#endif

void memset32(void *s, int c, size_t n) __attribute__((nonnull));
void memcpy32(void *dest, const void *src, size_t n) __attribute__((nonnull));

#if defined(__cplusplus)
}
#endif

/****************************************************************************************************************
 *
 * Fill memory with a given value
 *
 * Optimized version if it is known that all values are 4-byte aligned.
 *
 */
static __attribute__((always_inline)) inline void _memset32(void *s, int c, size_t n) {
  unsigned int end;

#ifdef THUMB

  asm volatile(
      "\n"
      "   @ memset32\n"
      "   mov %[end],#0\n"
      "1: str %[value],[%[dst],%[end]]\n"
      "   add %[end],#4\n"
      "   cmp %[end],%[count]\n"
      "   bne  1b\n"

      : [end]   "=&l" (end)

      : [value] "l" (c),
        [dst]   "l" (s),
        [count] "l" (n)

      : "cc");

#else

  end = (unsigned int)s + n;

  asm volatile(
      "\n"
      "   @ memset32\n"
      "1: str %[value],[%[dst]], #4\n"
      "   cmp %[end],%[dst]\n"
      "   bne  1b\n"

      : [dst]   "=&r" (s)

      :         "0" (s),
        [end]   "r" (end),
        [value] "r" (c)

      : "cc");

#endif

}

/****************************************************************************************************************
 *
 * Fill memory with a given value
 *
 * Optimized version if it is known that all values are 4-byte aligned and the count is constant.
 *
 */
static __attribute__((always_inline)) inline void _memset32_const(void *s, int c, size_t n) {

  if (n <= 16) {
    unsigned int *ptr = (unsigned int *)s;
    if (n == 0) return;
    if (n >= 4)  ptr[0] = c;
    if (n >= 8)  ptr[1] = c;
    if (n >= 12) ptr[2] = c;
    if (n >= 16) ptr[3] = c;
    asm volatile (""); // For whatever reason gcc seems to break things by reordering. This will stop that.
    return;
  }

  _memset32(s, c, n);
}

#define memset32(s, c, n) (__builtin_constant_p(n) ? _memset32_const(s, c, n) : _memset32(s, c, n))

/****************************************************************************************************************
 *
 * Copy memory around.
 *
 * Optimized version if it is known that all values are 4-byte aligned and do not overlap.
 *
 */
static __attribute__((always_inline)) inline void _memcpy32(void *dest, const void *src, size_t n) {
  unsigned int end, val;

#ifdef THUMB

  asm volatile(
      "\n"
      "   @ memcpy32\n"
      "   mov %[end],#0\n"
      "1: ldr %[value],[%[src],%[end]]\n"
      "   str %[value],[%[dst],%[end]]\n"
      "   add %[end],#4\n"
      "   cmp %[end],%[count]\n"
      "   bne  1b\n"

      : [dst]   "=&l" (dest),
        [src]   "=&l" (src),
        [value] "=&l" (val),
        [end]   "=&l" (end)

      :         "0" (dest),
                "1" (src),
        [count] "l" (n)

      : "cc");

#else

  end = (unsigned int)dest + n;

  asm volatile(
      "\n"
      "   @ memcpy32\n"
      "1: ldr %[value],[%[src]], #4\n"
      "   str %[value],[%[dst]], #4\n"
      "   cmp %[end],%[dst]\n"
      "   bne  1b\n"

      : [dst]   "=&r" (dest),
        [src]   "=&r" (src),
        [value] "=&r" (val)

      :         "0" (dest),
                "1" (src),
        [end]   "r" (end)

      : "cc");

#endif

}

/****************************************************************************************************************
 *
 * Copy memory
 *
 * Optimized version if it is known that all values are 4-byte aligned and the count is constant.
 *
 */
static __attribute__((always_inline)) inline void _memcpy32_const(void *dest, const void *src, size_t n) {

  if (n <= 8) {
    unsigned int *pdst = (unsigned int *)dest;
    const unsigned int *psrc = (const unsigned int *)src;
    if (n == 0) return;
    if (n >= 4) pdst[0] = psrc[0];
    if (n >= 8) pdst[1] = psrc[1];
    asm volatile (""); // For whatever reason gcc seems to break things by reordering. This will stop that.
    return;
  }

  _memcpy32(dest, src, n);
}

#define memcpy32(d, s, n) (__builtin_constant_p(n) ? _memcpy32_const(d, s, n) : _memcpy32(d, s, n))

/****************************************************************************************************************/
#endif
