This repository has been archived on 2024-01-26. You can view files and clone it, but cannot push or open issues or pull requests.
rgb_led_matrix/Adafruit_NeoPixel.cpp

926 lines
34 KiB
C++
Raw Normal View History

/*-------------------------------------------------------------------------
Arduino library to control a wide variety of WS2811- and WS2812-based RGB
LED devices such as Adafruit FLORA RGB Smart Pixels and NeoPixel strips.
Currently handles 400 and 800 KHz bitstreams on 8, 12 and 16 MHz ATmega
MCUs, with LEDs wired for RGB or GRB color order. 8 MHz MCUs provide
output on PORTB and PORTD, while 16 MHz chips can handle most output pins
(possible exception with upper PORT registers on the Arduino Mega).
Written by Phil Burgess / Paint Your Dragon for Adafruit Industries,
contributions by PJRC and other members of the open source community.
Adafruit invests time and resources providing this open source code,
please support Adafruit and open-source hardware by purchasing products
from Adafruit!
-------------------------------------------------------------------------
This file is part of the Adafruit NeoPixel library.
NeoPixel is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as
published by the Free Software Foundation, either version 3 of
the License, or (at your option) any later version.
NeoPixel is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with NeoPixel. If not, see
<http://www.gnu.org/licenses/>.
-------------------------------------------------------------------------*/
#include "Adafruit_NeoPixel.h"
Adafruit_NeoPixel::Adafruit_NeoPixel(uint16_t n, uint8_t p, uint8_t t) : numLEDs(n), numBytes(n * 3), pin(p), type(t), pixels(NULL)
#ifdef __AVR__
,port(portOutputRegister(digitalPinToPort(p))),
pinMask(digitalPinToBitMask(p))
#endif
{
if((pixels = (uint8_t *)malloc(numBytes))) {
memset(pixels, 0, numBytes);
}
}
#ifdef __MK20DX128__ // Teensy 3.0
static inline void delayShort(uint32_t) __attribute__((always_inline, unused));
static inline void delayShort(uint32_t num) {
asm volatile(
"L_%=_delay:" "\n\t"
"subs %0, #1" "\n\t"
"bne L_%=_delay" "\n"
: "+r" (num) :
);
}
#endif // __arm__
void Adafruit_NeoPixel::begin(void) {
pinMode(pin, OUTPUT);
digitalWrite(pin, LOW);
}
void Adafruit_NeoPixel::show(void) {
if(!pixels) return;
// Data latch = 50+ microsecond pause in the output stream. Rather than
// put a delay at the end of the function, the ending time is noted and
// the function will simply hold off (if needed) on issuing the
// subsequent round of data until the latch time has elapsed. This
// allows the mainline code to start generating the next frame of data
// rather than stalling for the latch.
while((micros() - endTime) < 50L);
// endTime is a private member (rather than global var) so that mutliple
// instances on different pins can be quickly issued in succession (each
// instance doesn't delay the next).
// In order to make this code runtime-configurable to work with any pin,
// SBI/CBI instructions are eschewed in favor of full PORT writes via the
// OUT or ST instructions. It relies on two facts: that peripheral
// functions (such as PWM) take precedence on output pins, so our PORT-
// wide writes won't interfere, and that interrupts are globally disabled
// while data is being issued to the LEDs, so no other code will be
// accessing the PORT. The code takes an initial 'snapshot' of the PORT
// state, computes 'pin high' and 'pin low' values, and writes these back
// to the PORT register as needed.
noInterrupts(); // Need 100% focus on instruction timing
#ifdef __AVR__
volatile uint16_t
i = numBytes; // Loop counter
volatile uint8_t
*ptr = pixels, // Pointer to next byte
b = *ptr++, // Current byte value
hi, // PORT w/output bit set high
lo; // PORT w/output bit set low
// Hand-tuned assembly code issues data to the LED drivers at a specific
// rate. There's separate code for different CPU speeds (8, 12, 16 MHz)
// for both the WS2811 (400 KHz) and WS2812 (800 KHz) drivers. The
// datastream timing for the LED drivers allows a little wiggle room each
// way (listed in the datasheets), so the conditions for compiling each
// case are set up for a range of frequencies rather than just the exact
// 8, 12 or 16 MHz values, permitting use with some close-but-not-spot-on
// devices (e.g. 16.5 MHz DigiSpark). The ranges were arrived at based
// on the datasheet figures and have not been extensively tested outside
// the canonical 8/12/16 MHz speeds; there's no guarantee these will work
// close to the extremes (or possibly they could be pushed further).
// Keep in mind only one CPU speed case actually gets compiled; the
// resulting program isn't as massive as it might look from source here.
// 8 MHz(ish) AVR ---------------------------------------------------------
#if (F_CPU >= 7400000UL) && (F_CPU <= 9500000UL)
if((type & NEO_SPDMASK) == NEO_KHZ800) { // 800 KHz bitstream
volatile uint8_t n1, n2 = 0; // First, next bits out
// Squeezing an 800 KHz stream out of an 8 MHz chip requires code
// specific to each PORT register. At present this is only written
// to work with pins on PORTD or PORTB, the most likely use case --
// this covers all the pins on the Adafruit Flora and the bulk of
// digital pins on the Arduino Pro 8 MHz (keep in mind, this code
// doesn't even get compiled for 16 MHz boards like the Uno, Mega,
// Leonardo, etc., so don't bother extending this out of hand).
// Additional PORTs could be added if you really need them, just
// duplicate the else and loop and change the PORT. Each add'l
// PORT will require about 150(ish) bytes of program space.
// 10 instruction clocks per bit: HHxxxxxLLL
// OUT instructions: ^ ^ ^ (T=0,2,7)
#ifdef PORTD // PORTD isn't present on ATtiny85, etc.
if(port == &PORTD) {
hi = PORTD | pinMask;
lo = PORTD & ~pinMask;
n1 = lo;
if(b & 0x80) n1 = hi;
// Dirty trick: RJMPs proceeding to the next instruction are used
// to delay two clock cycles in one instruction word (rather than
// using two NOPs). This was necessary in order to squeeze the
// loop down to exactly 64 words -- the maximum possible for a
// relative branch.
asm volatile(
"headD:\n\t" // Clk Pseudocode
// Bit 7:
"out %0, %1\n\t" // 1 PORT = hi
"mov %3, %4\n\t" // 1 n2 = lo
"out %0, %2\n\t" // 1 PORT = n1
"rjmp .+0\n\t" // 2 nop nop
"sbrc %5, 6\n\t" // 1-2 if(b & 0x40)
"mov %3, %1\n\t" // 0-1 n2 = hi
"out %0, %4\n\t" // 1 PORT = lo
"rjmp .+0\n\t" // 2 nop nop
// Bit 6:
"out %0, %1\n\t" // 1 PORT = hi
"mov %2, %4\n\t" // 1 n1 = lo
"out %0, %3\n\t" // 1 PORT = n2
"rjmp .+0\n\t" // 2 nop nop
"sbrc %5, 5\n\t" // 1-2 if(b & 0x20)
"mov %2, %1\n\t" // 0-1 n1 = hi
"out %0, %4\n\t" // 1 PORT = lo
"rjmp .+0\n\t" // 2 nop nop
// Bit 5:
"out %0, %1\n\t" // 1 PORT = hi
"mov %3, %4\n\t" // 1 n2 = lo
"out %0, %2\n\t" // 1 PORT = n1
"rjmp .+0\n\t" // 2 nop nop
"sbrc %5, 4\n\t" // 1-2 if(b & 0x10)
"mov %3, %1\n\t" // 0-1 n2 = hi
"out %0, %4\n\t" // 1 PORT = lo
"rjmp .+0\n\t" // 2 nop nop
// Bit 4:
"out %0, %1\n\t" // 1 PORT = hi
"mov %2, %4\n\t" // 1 n1 = lo
"out %0, %3\n\t" // 1 PORT = n2
"rjmp .+0\n\t" // 2 nop nop
"sbrc %5, 3\n\t" // 1-2 if(b & 0x08)
"mov %2, %1\n\t" // 0-1 n1 = hi
"out %0, %4\n\t" // 1 PORT = lo
"rjmp .+0\n\t" // 2 nop nop
// Bit 3:
"out %0, %1\n\t" // 1 PORT = hi
"mov %3, %4\n\t" // 1 n2 = lo
"out %0, %2\n\t" // 1 PORT = n1
"rjmp .+0\n\t" // 2 nop nop
"sbrc %5, 2\n\t" // 1-2 if(b & 0x04)
"mov %3, %1\n\t" // 0-1 n2 = hi
"out %0, %4\n\t" // 1 PORT = lo
"rjmp .+0\n\t" // 2 nop nop
// Bit 2:
"out %0, %1\n\t" // 1 PORT = hi
"mov %2, %4\n\t" // 1 n1 = lo
"out %0, %3\n\t" // 1 PORT = n2
"rjmp .+0\n\t" // 2 nop nop
"sbrc %5, 1\n\t" // 1-2 if(b & 0x02)
"mov %2, %1\n\t" // 0-1 n1 = hi
"out %0, %4\n\t" // 1 PORT = lo
"rjmp .+0\n\t" // 2 nop nop
// Bit 1:
"out %0, %1\n\t" // 1 PORT = hi
"mov %3, %4\n\t" // 1 n2 = lo
"out %0, %2\n\t" // 1 PORT = n1
"rjmp .+0\n\t" // 2 nop nop
"sbrc %5, 0\n\t" // 1-2 if(b & 0x01)
"mov %3, %1\n\t" // 0-1 n2 = hi
"out %0, %4\n\t" // 1 PORT = lo
"sbiw %6, 1\n\t" // 2 i-- (dec. but don't act on zero flag yet)
// Bit 0:
"out %0, %1\n\t" // 1 PORT = hi
"mov %2, %4\n\t" // 1 n1 = lo
"out %0, %3\n\t" // 1 PORT = n2
"ld %5, %a7+\n\t" // 2 b = *ptr++
"sbrc %5, 7\n\t" // 1-2 if(b & 0x80)
"mov %2, %1\n\t" // 0-1 n1 = hi
"out %0, %4\n\t" // 1 PORT = lo
"brne headD\n" // 2 while(i) (zero flag determined above)
::
"I" (_SFR_IO_ADDR(PORTD)), // %0
"r" (hi), // %1
"r" (n1), // %2
"r" (n2), // %3
"r" (lo), // %4
"r" (b), // %5
"w" (i), // %6
"e" (ptr) // %a7
); // end asm
} else if(port == &PORTB) {
#endif // PORTD
// Same as above, just switched to PORTB and stripped of comments.
hi = PORTB | pinMask;
lo = PORTB & ~pinMask;
n1 = lo;
if(b & 0x80) n1 = hi;
asm volatile(
"headB:\n\t"
"out %0, %1\n\t"
"mov %3, %4\n\t"
"out %0, %2\n\t"
"rjmp .+0\n\t"
"sbrc %5, 6\n\t"
"mov %3, %1\n\t"
"out %0, %4\n\t"
"rjmp .+0\n\t"
"out %0, %1\n\t"
"mov %2, %4\n\t"
"out %0, %3\n\t"
"rjmp .+0\n\t"
"sbrc %5, 5\n\t"
"mov %2, %1\n\t"
"out %0, %4\n\t"
"rjmp .+0\n\t"
"out %0, %1\n\t"
"mov %3, %4\n\t"
"out %0, %2\n\t"
"rjmp .+0\n\t"
"sbrc %5, 4\n\t"
"mov %3, %1\n\t"
"out %0, %4\n\t"
"rjmp .+0\n\t"
"out %0, %1\n\t"
"mov %2, %4\n\t"
"out %0, %3\n\t"
"rjmp .+0\n\t"
"sbrc %5, 3\n\t"
"mov %2, %1\n\t"
"out %0, %4\n\t"
"rjmp .+0\n\t"
"out %0, %1\n\t"
"mov %3, %4\n\t"
"out %0, %2\n\t"
"rjmp .+0\n\t"
"sbrc %5, 2\n\t"
"mov %3, %1\n\t"
"out %0, %4\n\t"
"rjmp .+0\n\t"
"out %0, %1\n\t"
"mov %2, %4\n\t"
"out %0, %3\n\t"
"rjmp .+0\n\t"
"sbrc %5, 1\n\t"
"mov %2, %1\n\t"
"out %0, %4\n\t"
"rjmp .+0\n\t"
"out %0, %1\n\t"
"mov %3, %4\n\t"
"out %0, %2\n\t"
"rjmp .+0\n\t"
"sbrc %5, 0\n\t"
"mov %3, %1\n\t"
"out %0, %4\n\t"
"sbiw %6, 1\n\t"
"out %0, %1\n\t"
"mov %2, %4\n\t"
"out %0, %3\n\t"
"ld %5, %a7+\n\t"
"sbrc %5, 7\n\t"
"mov %2, %1\n\t"
"out %0, %4\n\t"
"brne headB\n" :: "I" (_SFR_IO_ADDR(PORTB)), "r" (hi),
"r" (n1), "r" (n2), "r" (lo), "r" (b), "w" (i), "e" (ptr));
#ifdef PORTD
} // endif PORTB
#endif
} else { // end 800 KHz, do 400 KHz
// Timing is more relaxed; unrolling the inner loop for each bit is
// not necessary. Still using the peculiar RJMPs as 2X NOPs, not out
// of need but just to trim the code size down a little.
// This 400-KHz-datastream-on-8-MHz-CPU code is not quite identical
// to the 800-on-16 code later -- the hi/lo timing between WS2811 and
// WS2812 is not simply a 2:1 scale!
// 20 inst. clocks per bit: HHHHxxxxxxLLLLLLLLLL
// ST instructions: ^ ^ ^ (T=0,4,10)
volatile uint8_t next, bit;
hi = *port | pinMask;
lo = *port & ~pinMask;
next = lo;
bit = 8;
asm volatile(
"head20:\n\t" // Clk Pseudocode (T = 0)
"st %a0, %1\n\t" // 2 PORT = hi (T = 2)
"sbrc %2, 7\n\t" // 1-2 if(b & 128)
"mov %4, %1\n\t" // 0-1 next = hi (T = 4)
"st %a0, %4\n\t" // 2 PORT = next (T = 6)
"mov %4, %5\n\t" // 1 next = lo (T = 7)
"dec %3\n\t" // 1 bit-- (T = 8)
"breq nextbyte20\n\t" // 1-2 if(bit == 0)
"rol %2\n\t" // 1 b <<= 1 (T = 10)
"st %a0, %5\n\t" // 2 PORT = lo (T = 12)
"rjmp .+0\n\t" // 2 nop nop (T = 14)
"rjmp .+0\n\t" // 2 nop nop (T = 16)
"rjmp .+0\n\t" // 2 nop nop (T = 18)
"rjmp head20\n\t" // 2 -> head20 (next bit out)
"nextbyte20:\n\t" // (T = 10)
"st %a0, %5\n\t" // 2 PORT = lo (T = 12)
"nop\n\t" // 1 nop (T = 13)
"ldi %3, 8\n\t" // 1 bit = 8 (T = 14)
"ld %2, %a6+\n\t" // 2 b = *ptr++ (T = 16)
"sbiw %7, 1\n\t" // 2 i-- (T = 18)
"brne head20\n\t" // 2 if(i != 0) -> head20 (next byte)
::
"e" (port), // %a0
"r" (hi), // %1
"r" (b), // %2
"r" (bit), // %3
"r" (next), // %4
"r" (lo), // %5
"e" (ptr), // %a6
"w" (i) // %7
); // end asm
}
// 12 MHz(ish) AVR --------------------------------------------------------
#elif (F_CPU >= 11100000UL) && (F_CPU <= 14300000UL)
if((type & NEO_SPDMASK) == NEO_KHZ800) { // 800 KHz bitstream
// In the 12 MHz case, an optimized 800 KHz datastream (no dead time
// between bytes) requires a PORT-specific loop similar to the 8 MHz
// code (but a little more relaxed in this case).
// 15 instruction clocks per bit: HHHHxxxxxxLLLLL
// OUT instructions: ^ ^ ^ (T=0,4,10)
volatile uint8_t next;
#ifdef PORTD
if(port == &PORTD) {
hi = PORTD | pinMask;
lo = PORTD & ~pinMask;
next = lo;
if(b & 0x80) next = hi;
// Don't "optimize" the OUT calls into the bitTime subroutine;
// we're exploiting the RCALL and RET as 3- and 4-cycle NOPs!
asm volatile(
"headD:\n\t" // (T = 0)
"out %0, %1\n\t" // (T = 1)
"rcall bitTimeD\n\t" // Bit 7 (T = 15)
"out %0, %1\n\t"
"rcall bitTimeD\n\t" // Bit 6
"out %0, %1\n\t"
"rcall bitTimeD\n\t" // Bit 5
"out %0, %1\n\t"
"rcall bitTimeD\n\t" // Bit 4
"out %0, %1\n\t"
"rcall bitTimeD\n\t" // Bit 3
"out %0, %1\n\t"
"rcall bitTimeD\n\t" // Bit 2
"out %0, %1\n\t"
"rcall bitTimeD\n\t" // Bit 1
// Bit 0:
"out %0, %1\n\t" // 1 PORT = hi (T = 1)
"rjmp .+0\n\t" // 2 nop nop (T = 3)
"ld %4, %a5+\n\t" // 2 b = *ptr++ (T = 5)
"out %0, %2\n\t" // 1 PORT = next (T = 6)
"mov %2, %3\n\t" // 1 next = lo (T = 7)
"sbrc %4, 7\n\t" // 1-2 if(b & 0x80) (T = 8)
"mov %2, %1\n\t" // 0-1 next = hi (T = 9)
"nop\n\t" // 1 (T = 10)
"out %0, %3\n\t" // 1 PORT = lo (T = 11)
"sbiw %6, 1\n\t" // 2 i-- (T = 13)
"brne headD\n\t" // 2 if(i != 0) -> headD (next byte)
"rjmp doneD\n\t"
"bitTimeD:\n\t" // nop nop nop (T = 4)
"out %0, %2\n\t" // 1 PORT = next (T = 5)
"mov %2, %3\n\t" // 1 next = lo (T = 6)
"rol %4\n\t" // 1 b <<= 1 (T = 7)
"sbrc %4, 7\n\t" // 1-2 if(b & 0x80) (T = 8)
"mov %2, %1\n\t" // 0-1 next = hi (T = 9)
"nop\n\t" // 1 (T = 10)
"out %0, %3\n\t" // 1 PORT = lo (T = 11)
"ret\n\t" // 4 nop nop nop nop (T = 15)
"doneD:\n\t"
::
"I" (_SFR_IO_ADDR(PORTD)), // %0
"r" (hi), // %1
"r" (next), // %2
"r" (lo), // %3
"r" (b), // %4
"e" (ptr), // %a5
"w" (i) // %6
); // end asm
} else if(port == &PORTB) {
#endif // PORTD
hi = PORTB | pinMask;
lo = PORTB & ~pinMask;
next = lo;
if(b & 0x80) next = hi;
// Same as above, just set for PORTB & stripped of comments
asm volatile(
"headB:\n\t"
"out %0, %1\n\t"
"rcall bitTimeB\n\t"
"out %0, %1\n\t"
"rcall bitTimeB\n\t"
"out %0, %1\n\t"
"rcall bitTimeB\n\t"
"out %0, %1\n\t"
"rcall bitTimeB\n\t"
"out %0, %1\n\t"
"rcall bitTimeB\n\t"
"out %0, %1\n\t"
"rcall bitTimeB\n\t"
"out %0, %1\n\t"
"rcall bitTimeB\n\t"
"out %0, %1\n\t"
"rjmp .+0\n\t"
"ld %4, %a5+\n\t"
"out %0, %2\n\t"
"mov %2, %3\n\t"
"sbrc %4, 7\n\t"
"mov %2, %1\n\t"
"nop\n\t"
"out %0, %3\n\t"
"sbiw %6, 1\n\t"
"brne headB\n\t"
"rjmp doneB\n\t"
"bitTimeB:\n\t"
"out %0, %2\n\t"
"mov %2, %3\n\t"
"rol %4\n\t"
"sbrc %4, 7\n\t"
"mov %2, %1\n\t"
"nop\n\t"
"out %0, %3\n\t"
"ret\n\t"
"doneB:\n\t" :: "I" (_SFR_IO_ADDR(PORTB)), "r" (hi), "r" (next),
"r" (lo), "r" (b), "e" (ptr), "w" (i));
#ifdef PORTD
}
#endif
} else { // 400 KHz
// 30 instruction clocks per bit: HHHHHHxxxxxxxxxLLLLLLLLLLLLLLL
// ST instructions: ^ ^ ^ (T=0,6,15)
volatile uint8_t next, bit;
hi = *port | pinMask;
lo = *port & ~pinMask;
next = lo;
bit = 8;
asm volatile(
"head30:\n\t" // Clk Pseudocode (T = 0)
"st %a0, %1\n\t" // 2 PORT = hi (T = 2)
"sbrc %2, 7\n\t" // 1-2 if(b & 128)
"mov %4, %1\n\t" // 0-1 next = hi (T = 4)
"rjmp .+0\n\t" // 2 nop nop (T = 6)
"st %a0, %4\n\t" // 2 PORT = next (T = 8)
"rjmp .+0\n\t" // 2 nop nop (T = 10)
"rjmp .+0\n\t" // 2 nop nop (T = 12)
"rjmp .+0\n\t" // 2 nop nop (T = 14)
"nop\n\t" // 1 nop (T = 15)
"st %a0, %5\n\t" // 2 PORT = lo (T = 17)
"rjmp .+0\n\t" // 2 nop nop (T = 19)
"dec %3\n\t" // 1 bit-- (T = 20)
"breq nextbyte30\n\t" // 1-2 if(bit == 0)
"rol %2\n\t" // 1 b <<= 1 (T = 22)
"rjmp .+0\n\t" // 2 nop nop (T = 24)
"rjmp .+0\n\t" // 2 nop nop (T = 26)
"rjmp .+0\n\t" // 2 nop nop (T = 28)
"rjmp head30\n\t" // 2 -> head30 (next bit out)
"nextbyte30:\n\t" // (T = 22)
"nop\n\t" // 1 nop (T = 23)
"ldi %3, 8\n\t" // 1 bit = 8 (T = 24)
"ld %2, %a6+\n\t" // 2 b = *ptr++ (T = 26)
"sbiw %7, 1\n\t" // 2 i-- (T = 28)
"brne head30\n\t" // 1-2 if(i != 0) -> head30 (next byte)
::
"e" (port), // %a0
"r" (hi), // %1
"r" (b), // %2
"r" (bit), // %3
"r" (next), // %4
"r" (lo), // %5
"e" (ptr), // %a6
"w" (i) // %7
); // end asm
}
// 16 MHz(ish) AVR --------------------------------------------------------
#elif (F_CPU >= 15400000UL) && (F_CPU <= 19000000L)
if((type & NEO_SPDMASK) == NEO_KHZ800) { // 800 KHz bitstream
// WS2811 and WS2812 have different hi/lo duty cycles; this is
// similar but NOT an exact copy of the prior 400-on-8 code.
// 20 inst. clocks per bit: HHHHHxxxxxxxxLLLLLLL
// ST instructions: ^ ^ ^ (T=0,5,13)
volatile uint8_t next, bit;
hi = *port | pinMask;
lo = *port & ~pinMask;
next = lo;
bit = 8;
asm volatile(
"head20:\n\t" // Clk Pseudocode (T = 0)
"st %a0, %1\n\t" // 2 PORT = hi (T = 2)
"sbrc %2, 7\n\t" // 1-2 if(b & 128)
"mov %4, %1\n\t" // 0-1 next = hi (T = 4)
"dec %3\n\t" // 1 bit-- (T = 5)
"st %a0, %4\n\t" // 2 PORT = next (T = 7) ST and MOV don't
"mov %4, %5\n\t" // 1 next = lo (T = 8) change Z flag,
"breq nextbyte20\n\t" // 1-2 if(bit == 0) <-- so this is OK.
"rol %2\n\t" // 1 b <<= 1 (T = 10)
"rjmp .+0\n\t" // 2 nop nop (T = 12)
"nop\n\t" // 1 nop (T = 13)
"st %a0, %5\n\t" // 2 PORT = lo (T = 15)
"nop\n\t" // 1 nop (T = 16)
"rjmp .+0\n\t" // 2 nop nop (T = 18)
"rjmp head20\n\t" // 2 -> head20 (next bit out)
"nextbyte20:\n\t" // (T = 10)
"ldi %3, 8\n\t" // 1 bit = 8 (T = 11)
"ld %2, %a6+\n\t" // 2 b = *ptr++ (T = 13)
"st %a0, %5\n\t" // 2 PORT = lo (T = 15)
"nop\n\t" // 1 nop (T = 16)
"sbiw %7, 1\n\t" // 2 i-- (T = 18)
"brne head20\n\t" // 2 if(i != 0) -> head20 (next byte)
::
"e" (port), // %a0
"r" (hi), // %1
"r" (b), // %2
"r" (bit), // %3
"r" (next), // %4
"r" (lo), // %5
"e" (ptr), // %a6
"w" (i) // %7
); // end asm
} else { // 400 KHz
// The 400 KHz clock on 16 MHz MCU is the most 'relaxed' version.
// 40 inst. clocks per bit: HHHHHHHHxxxxxxxxxxxxLLLLLLLLLLLLLLLLLLLL
// ST instructions: ^ ^ ^ (T=0,8,20)
volatile uint8_t next, bit;
hi = *port | pinMask;
lo = *port & ~pinMask;
next = lo;
bit = 8;
asm volatile(
"head40:\n\t" // Clk Pseudocode (T = 0)
"st %a0, %1\n\t" // 2 PORT = hi (T = 2)
"sbrc %2, 7\n\t" // 1-2 if(b & 128)
"mov %4, %1\n\t" // 0-1 next = hi (T = 4)
"rjmp .+0\n\t" // 2 nop nop (T = 6)
"rjmp .+0\n\t" // 2 nop nop (T = 8)
"st %a0, %4\n\t" // 2 PORT = next (T = 10)
"rjmp .+0\n\t" // 2 nop nop (T = 12)
"rjmp .+0\n\t" // 2 nop nop (T = 14)
"rjmp .+0\n\t" // 2 nop nop (T = 16)
"rjmp .+0\n\t" // 2 nop nop (T = 18)
"rjmp .+0\n\t" // 2 nop nop (T = 20)
"st %a0, %5\n\t" // 2 PORT = lo (T = 22)
"nop\n\t" // 1 nop (T = 23)
"mov %4, %5\n\t" // 1 next = lo (T = 24)
"dec %3\n\t" // 1 bit-- (T = 25)
"breq nextbyte40\n\t" // 1-2 if(bit == 0)
"rol %2\n\t" // 1 b <<= 1 (T = 27)
"nop\n\t" // 1 nop (T = 28)
"rjmp .+0\n\t" // 2 nop nop (T = 30)
"rjmp .+0\n\t" // 2 nop nop (T = 32)
"rjmp .+0\n\t" // 2 nop nop (T = 34)
"rjmp .+0\n\t" // 2 nop nop (T = 36)
"rjmp .+0\n\t" // 2 nop nop (T = 38)
"rjmp head40\n\t" // 2 -> head40 (next bit out)
"nextbyte40:\n\t" // (T = 27)
"ldi %3, 8\n\t" // 1 bit = 8 (T = 28)
"ld %2, %a6+\n\t" // 2 b = *ptr++ (T = 30)
"rjmp .+0\n\t" // 2 nop nop (T = 32)
"st %a0, %5\n\t" // 2 PORT = lo (T = 34)
"rjmp .+0\n\t" // 2 nop nop (T = 36)
"sbiw %7, 1\n\t" // 2 i-- (T = 38)
"brne head40\n\t" // 1-2 if(i != 0) -> head40 (next byte)
::
"e" (port), // %a0
"r" (hi), // %1
"r" (b), // %2
"r" (bit), // %3
"r" (next), // %4
"r" (lo), // %5
"e" (ptr), // %a6
"w" (i) // %7
); // end asm
}
#else
#error "CPU SPEED NOT SUPPORTED"
#endif
#elif defined(__arm__)
// Paul Stoffregen: "This implementation may not be quite perfect, but
// it seems to work reasonably well with an actual 20 LED WS2811 strip.
// The timing at 48 MHz is off a bit, perhaps due to flash cache misses?
// Ideally this code should execute from RAM to eliminate slight timing
// differences between flash caches hits and misses. But it seems to
// [run] quite well. More testing is needed with longer strips."
/* If timing can be stabilized, something like this should work:
#define DELAY_800_T0H (0.40 * F_CPU / 1000000L / DCYC + 0.5)
#define DELAY_800_T0L (0.85 * F_CPU / 1000000L / DCYC + 0.5)
#define DELAY_800_T1H (0.80 * F_CPU / 1000000L / DCYC + 0.5)
#define DELAY_800_T1L (0.45 * F_CPU / 1000000L / DCYC + 0.5)
#define DELAY_400_T0H (0.50 * F_CPU / 1000000L / DCYC + 0.5)
#define DELAY_400_T0L (2.00 * F_CPU / 1000000L / DCYC + 0.5)
#define DELAY_400_T1H (1.20 * F_CPU / 1000000L / DCYC + 0.5)
#define DELAY_400_T1L (1.30 * F_CPU / 1000000L / DCYC + 0.5)
But in the meantime, a fixed set of tables is used:
*/
#ifdef __MK20DX128__ // Teensy 3.0
#if (F_CPU == 24000000)
#define DELAY_800_T0H 2
#define DELAY_800_T0L 4
#define DELAY_800_T1H 5
#define DELAY_800_T1L 1
#define DELAY_400_T0H 3
#define DELAY_400_T0L 10
#define DELAY_400_T1H 9
#define DELAY_400_T1L 5
#elif (F_CPU == 48000000)
#define DELAY_800_T0H 4
#define DELAY_800_T0L 9
#define DELAY_800_T1H 12
#define DELAY_800_T1L 1
#define DELAY_400_T0H 6
#define DELAY_400_T0L 20
#define DELAY_400_T1H 18
#define DELAY_400_T1L 11
#elif (F_CPU == 96000000)
#define DELAY_800_T0H 7
#define DELAY_800_T0L 17
#define DELAY_800_T1H 22
#define DELAY_800_T1L 2
#define DELAY_400_T0H 12
#define DELAY_400_T0L 40
#define DELAY_400_T1H 36
#define DELAY_400_T1L 22
#else
#error "CPU SPEED NOT SUPPORTED"
#endif
volatile uint8_t *set = portSetRegister(pin);
volatile uint8_t *clr = portClearRegister(pin);
#define SET_HI *set = 1;
#define SET_LO *clr = 1;
uint8_t *p = pixels,
*end = p + numBytes, pix, mask;
if((type & NEO_SPDMASK) == NEO_KHZ800) { // 800 KHz bitstream
while(p < end) {
pix = *p++;
for(mask = 0x80; mask; mask >>= 1) {
SET_HI
if(pix & mask) {
delayShort(DELAY_800_T1H);
SET_LO
delayShort(DELAY_800_T1L);
} else {
delayShort(DELAY_800_T0H);
SET_LO
delayShort(DELAY_800_T0L);
}
}
}
} else { // 400 kHz bitstream
while(p < end) {
pix = *p++;
for(mask = 0x80; mask; mask >>= 1) {
SET_HI
if(pix & mask) {
delayShort(DELAY_400_T1H);
SET_LO
delayShort(DELAY_400_T1L);
} else {
delayShort(DELAY_400_T0H);
SET_LO
delayShort(DELAY_400_T0L);
}
}
}
}
#else // Arduino Due
#define SCALE VARIANT_MCK / 2UL / 1000000UL
#define INST (2UL * F_CPU / VARIANT_MCK)
#define TIME_800_L ((int)(0.40 * SCALE + 0.5) - (5 * INST))
#define TIME_800_H ((int)(0.80 * SCALE + 0.5) - (5 * INST))
#define PERIOD_800 ((int)(1.25 * SCALE + 0.5) - (5 * INST))
#define TIME_400_L ((int)(0.50 * SCALE + 0.5) - (5 * INST))
#define TIME_400_H ((int)(1.20 * SCALE + 0.5) - (5 * INST))
#define PERIOD_400 ((int)(2.50 * SCALE + 0.5) - (5 * INST))
int pinMask, timeLo, timeHi, period, t;
Pio *port;
volatile WoReg *portSet, *portClear, *timeValue, *timeReset;
uint8_t *p, *end, pix, mask;
pmc_set_writeprotect(false);
pmc_enable_periph_clk((uint32_t)TC3_IRQn);
TC_Configure(TC1, 0,
TC_CMR_WAVE | TC_CMR_WAVSEL_UP | TC_CMR_TCCLKS_TIMER_CLOCK1);
TC_Start(TC1, 0);
pinMask = g_APinDescription[pin].ulPin; // Don't 'optimize' these into
port = g_APinDescription[pin].pPort; // declarations above. Want to
portSet = &(port->PIO_SODR); // burn a few cycles after
portClear = &(port->PIO_CODR); // starting timer to minimize
timeValue = &(TC1->TC_CHANNEL[0].TC_CV); // the initial 'while'.
timeReset = &(TC1->TC_CHANNEL[0].TC_CCR);
p = pixels;
end = p + numBytes;
pix = *p++;
mask = 0x80;
if((type & NEO_SPDMASK) == NEO_KHZ800) { // 800 KHz bitstream
timeLo = TIME_800_L;
timeHi = TIME_800_H;
period = PERIOD_800;
} else { // 400 KHz bitstream
timeLo = TIME_400_L;
timeHi = TIME_400_H;
period = PERIOD_400;
}
for(t = timeLo;; t = timeLo) {
if(pix & mask) t = timeHi;
while(*timeValue < period);
*portSet = pinMask;
*timeReset = TC_CCR_CLKEN | TC_CCR_SWTRG;
while(*timeValue < t);
*portClear = pinMask;
if(!(mask >>= 1)) { // This 'inside-out' loop logic utilizes
if(p >= end) break; // idle time to minimize inter-byte delays.
pix = *p++;
mask = 0x80;
}
}
while(*timeValue < period); // Wait for last bit
TC_Stop(TC1, 0);
#endif // end Arduino Due
#endif // end Architecture select
interrupts();
endTime = micros(); // Save EOD time for latch on next call
}
// Set pixel color from separate R,G,B components:
void Adafruit_NeoPixel::setPixelColor(
uint16_t n, uint8_t r, uint8_t g, uint8_t b) {
if(n < numLEDs) {
if(brightness) { // See notes in setBrightness()
r = (r * brightness) >> 8;
g = (g * brightness) >> 8;
b = (b * brightness) >> 8;
}
uint8_t *p = &pixels[n * 3];
if((type & NEO_COLMASK) == NEO_GRB) { *p++ = g; *p++ = r; }
else { *p++ = r; *p++ = g; }
*p = b;
}
}
// Set pixel color from 'packed' 32-bit RGB color:
void Adafruit_NeoPixel::setPixelColor(uint16_t n, uint32_t c) {
if(n < numLEDs) {
uint8_t
r = (uint8_t)(c >> 16),
g = (uint8_t)(c >> 8),
b = (uint8_t)c;
if(brightness) { // See notes in setBrightness()
r = (r * brightness) >> 8;
g = (g * brightness) >> 8;
b = (b * brightness) >> 8;
}
uint8_t *p = &pixels[n * 3];
if((type & NEO_COLMASK) == NEO_GRB) { *p++ = g; *p++ = r; }
else { *p++ = r; *p++ = g; }
*p = b;
}
}
// Convert separate R,G,B into packed 32-bit RGB color.
// Packed format is always RGB, regardless of LED strand color order.
uint32_t Adafruit_NeoPixel::Color(uint8_t r, uint8_t g, uint8_t b) {
return ((uint32_t)r << 16) | ((uint32_t)g << 8) | b;
}
// Query color from previously-set pixel (returns packed 32-bit RGB value)
uint32_t Adafruit_NeoPixel::getPixelColor(uint16_t n) {
if(n < numLEDs) {
uint16_t ofs = n * 3;
return (uint32_t)(pixels[ofs + 2]) |
(((type & NEO_COLMASK) == NEO_GRB) ?
((uint32_t)(pixels[ofs ]) << 8) |
((uint32_t)(pixels[ofs + 1]) << 16)
:
((uint32_t)(pixels[ofs ]) << 16) |
((uint32_t)(pixels[ofs + 1]) << 8) );
}
return 0; // Pixel # is out of bounds
}
uint16_t Adafruit_NeoPixel::numPixels(void) {
return numLEDs;
}
// Adjust output brightness; 0=darkest (off), 255=brightest. This does
// NOT immediately affect what's currently displayed on the LEDs. The
// next call to show() will refresh the LEDs at this level. However,
// this process is potentially "lossy," especially when increasing
// brightness. The tight timing in the WS2811/WS2812 code means there
// aren't enough free cycles to perform this scaling on the fly as data
// is issued. So we make a pass through the existing color data in RAM
// and scale it (subsequent graphics commands also work at this
// brightness level). If there's a significant step up in brightness,
// the limited number of steps (quantization) in the old data will be
// quite visible in the re-scaled version. For a non-destructive
// change, you'll need to re-render the full strip data. C'est la vie.
void Adafruit_NeoPixel::setBrightness(uint8_t b) {
// Stored brightness value is different than what's passed.
// This simplifies the actual scaling math later, allowing a fast
// 8x8-bit multiply and taking the MSB. 'brightness' is a uint8_t,
// adding 1 here may (intentionally) roll over...so 0 = max brightness
// (color values are interpreted literally; no scaling), 1 = min
// brightness (off), 255 = just below max brightness.
uint8_t newBrightness = b + 1;
if(newBrightness != brightness) { // Compare against prior value
// Brightness has changed -- re-scale existing data in RAM
uint8_t c,
*ptr = pixels,
oldBrightness = brightness - 1; // De-wrap old brightness value
uint16_t scale;
if(oldBrightness == 0) scale = 0; // Avoid /0
else if(b == 255) scale = 65535 / oldBrightness;
else scale = (((uint16_t)newBrightness << 8) - 1) / oldBrightness;
for(uint16_t i=0; i<numBytes; i++) {
c = *ptr;
*ptr++ = (c * scale) >> 8;
}
brightness = newBrightness;
}
}