925 lines
34 KiB
C++
925 lines
34 KiB
C++
/*-------------------------------------------------------------------------
|
|
Arduino library to control a wide variety of WS2811- and WS2812-based RGB
|
|
LED devices such as Adafruit FLORA RGB Smart Pixels and NeoPixel strips.
|
|
Currently handles 400 and 800 KHz bitstreams on 8, 12 and 16 MHz ATmega
|
|
MCUs, with LEDs wired for RGB or GRB color order. 8 MHz MCUs provide
|
|
output on PORTB and PORTD, while 16 MHz chips can handle most output pins
|
|
(possible exception with upper PORT registers on the Arduino Mega).
|
|
|
|
Written by Phil Burgess / Paint Your Dragon for Adafruit Industries,
|
|
contributions by PJRC and other members of the open source community.
|
|
|
|
Adafruit invests time and resources providing this open source code,
|
|
please support Adafruit and open-source hardware by purchasing products
|
|
from Adafruit!
|
|
|
|
-------------------------------------------------------------------------
|
|
This file is part of the Adafruit NeoPixel library.
|
|
|
|
NeoPixel is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU Lesser General Public License as
|
|
published by the Free Software Foundation, either version 3 of
|
|
the License, or (at your option) any later version.
|
|
|
|
NeoPixel is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU Lesser General Public License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public
|
|
License along with NeoPixel. If not, see
|
|
<http://www.gnu.org/licenses/>.
|
|
-------------------------------------------------------------------------*/
|
|
|
|
#include "Adafruit_NeoPixel.h"
|
|
|
|
Adafruit_NeoPixel::Adafruit_NeoPixel(uint16_t n, uint8_t p, uint8_t t) : numLEDs(n), numBytes(n * 3), pin(p), type(t), pixels(NULL)
|
|
#ifdef __AVR__
|
|
,port(portOutputRegister(digitalPinToPort(p))),
|
|
pinMask(digitalPinToBitMask(p))
|
|
#endif
|
|
{
|
|
if((pixels = (uint8_t *)malloc(numBytes))) {
|
|
memset(pixels, 0, numBytes);
|
|
}
|
|
}
|
|
|
|
#ifdef __MK20DX128__ // Teensy 3.0
|
|
static inline void delayShort(uint32_t) __attribute__((always_inline, unused));
|
|
static inline void delayShort(uint32_t num) {
|
|
asm volatile(
|
|
"L_%=_delay:" "\n\t"
|
|
"subs %0, #1" "\n\t"
|
|
"bne L_%=_delay" "\n"
|
|
: "+r" (num) :
|
|
);
|
|
}
|
|
#endif // __arm__
|
|
|
|
void Adafruit_NeoPixel::begin(void) {
|
|
pinMode(pin, OUTPUT);
|
|
digitalWrite(pin, LOW);
|
|
}
|
|
|
|
void Adafruit_NeoPixel::show(void) {
|
|
|
|
if(!pixels) return;
|
|
|
|
// Data latch = 50+ microsecond pause in the output stream. Rather than
|
|
// put a delay at the end of the function, the ending time is noted and
|
|
// the function will simply hold off (if needed) on issuing the
|
|
// subsequent round of data until the latch time has elapsed. This
|
|
// allows the mainline code to start generating the next frame of data
|
|
// rather than stalling for the latch.
|
|
while((micros() - endTime) < 50L);
|
|
// endTime is a private member (rather than global var) so that mutliple
|
|
// instances on different pins can be quickly issued in succession (each
|
|
// instance doesn't delay the next).
|
|
|
|
// In order to make this code runtime-configurable to work with any pin,
|
|
// SBI/CBI instructions are eschewed in favor of full PORT writes via the
|
|
// OUT or ST instructions. It relies on two facts: that peripheral
|
|
// functions (such as PWM) take precedence on output pins, so our PORT-
|
|
// wide writes won't interfere, and that interrupts are globally disabled
|
|
// while data is being issued to the LEDs, so no other code will be
|
|
// accessing the PORT. The code takes an initial 'snapshot' of the PORT
|
|
// state, computes 'pin high' and 'pin low' values, and writes these back
|
|
// to the PORT register as needed.
|
|
|
|
noInterrupts(); // Need 100% focus on instruction timing
|
|
|
|
#ifdef __AVR__
|
|
|
|
volatile uint16_t
|
|
i = numBytes; // Loop counter
|
|
volatile uint8_t
|
|
*ptr = pixels, // Pointer to next byte
|
|
b = *ptr++, // Current byte value
|
|
hi, // PORT w/output bit set high
|
|
lo; // PORT w/output bit set low
|
|
|
|
// Hand-tuned assembly code issues data to the LED drivers at a specific
|
|
// rate. There's separate code for different CPU speeds (8, 12, 16 MHz)
|
|
// for both the WS2811 (400 KHz) and WS2812 (800 KHz) drivers. The
|
|
// datastream timing for the LED drivers allows a little wiggle room each
|
|
// way (listed in the datasheets), so the conditions for compiling each
|
|
// case are set up for a range of frequencies rather than just the exact
|
|
// 8, 12 or 16 MHz values, permitting use with some close-but-not-spot-on
|
|
// devices (e.g. 16.5 MHz DigiSpark). The ranges were arrived at based
|
|
// on the datasheet figures and have not been extensively tested outside
|
|
// the canonical 8/12/16 MHz speeds; there's no guarantee these will work
|
|
// close to the extremes (or possibly they could be pushed further).
|
|
// Keep in mind only one CPU speed case actually gets compiled; the
|
|
// resulting program isn't as massive as it might look from source here.
|
|
|
|
// 8 MHz(ish) AVR ---------------------------------------------------------
|
|
#if (F_CPU >= 7400000UL) && (F_CPU <= 9500000UL)
|
|
|
|
if((type & NEO_SPDMASK) == NEO_KHZ800) { // 800 KHz bitstream
|
|
|
|
volatile uint8_t n1, n2 = 0; // First, next bits out
|
|
|
|
// Squeezing an 800 KHz stream out of an 8 MHz chip requires code
|
|
// specific to each PORT register. At present this is only written
|
|
// to work with pins on PORTD or PORTB, the most likely use case --
|
|
// this covers all the pins on the Adafruit Flora and the bulk of
|
|
// digital pins on the Arduino Pro 8 MHz (keep in mind, this code
|
|
// doesn't even get compiled for 16 MHz boards like the Uno, Mega,
|
|
// Leonardo, etc., so don't bother extending this out of hand).
|
|
// Additional PORTs could be added if you really need them, just
|
|
// duplicate the else and loop and change the PORT. Each add'l
|
|
// PORT will require about 150(ish) bytes of program space.
|
|
|
|
// 10 instruction clocks per bit: HHxxxxxLLL
|
|
// OUT instructions: ^ ^ ^ (T=0,2,7)
|
|
|
|
#ifdef PORTD // PORTD isn't present on ATtiny85, etc.
|
|
|
|
if(port == &PORTD) {
|
|
|
|
hi = PORTD | pinMask;
|
|
lo = PORTD & ~pinMask;
|
|
n1 = lo;
|
|
if(b & 0x80) n1 = hi;
|
|
|
|
// Dirty trick: RJMPs proceeding to the next instruction are used
|
|
// to delay two clock cycles in one instruction word (rather than
|
|
// using two NOPs). This was necessary in order to squeeze the
|
|
// loop down to exactly 64 words -- the maximum possible for a
|
|
// relative branch.
|
|
|
|
asm volatile(
|
|
"headD:\n\t" // Clk Pseudocode
|
|
// Bit 7:
|
|
"out %0, %1\n\t" // 1 PORT = hi
|
|
"mov %3, %4\n\t" // 1 n2 = lo
|
|
"out %0, %2\n\t" // 1 PORT = n1
|
|
"rjmp .+0\n\t" // 2 nop nop
|
|
"sbrc %5, 6\n\t" // 1-2 if(b & 0x40)
|
|
"mov %3, %1\n\t" // 0-1 n2 = hi
|
|
"out %0, %4\n\t" // 1 PORT = lo
|
|
"rjmp .+0\n\t" // 2 nop nop
|
|
// Bit 6:
|
|
"out %0, %1\n\t" // 1 PORT = hi
|
|
"mov %2, %4\n\t" // 1 n1 = lo
|
|
"out %0, %3\n\t" // 1 PORT = n2
|
|
"rjmp .+0\n\t" // 2 nop nop
|
|
"sbrc %5, 5\n\t" // 1-2 if(b & 0x20)
|
|
"mov %2, %1\n\t" // 0-1 n1 = hi
|
|
"out %0, %4\n\t" // 1 PORT = lo
|
|
"rjmp .+0\n\t" // 2 nop nop
|
|
// Bit 5:
|
|
"out %0, %1\n\t" // 1 PORT = hi
|
|
"mov %3, %4\n\t" // 1 n2 = lo
|
|
"out %0, %2\n\t" // 1 PORT = n1
|
|
"rjmp .+0\n\t" // 2 nop nop
|
|
"sbrc %5, 4\n\t" // 1-2 if(b & 0x10)
|
|
"mov %3, %1\n\t" // 0-1 n2 = hi
|
|
"out %0, %4\n\t" // 1 PORT = lo
|
|
"rjmp .+0\n\t" // 2 nop nop
|
|
// Bit 4:
|
|
"out %0, %1\n\t" // 1 PORT = hi
|
|
"mov %2, %4\n\t" // 1 n1 = lo
|
|
"out %0, %3\n\t" // 1 PORT = n2
|
|
"rjmp .+0\n\t" // 2 nop nop
|
|
"sbrc %5, 3\n\t" // 1-2 if(b & 0x08)
|
|
"mov %2, %1\n\t" // 0-1 n1 = hi
|
|
"out %0, %4\n\t" // 1 PORT = lo
|
|
"rjmp .+0\n\t" // 2 nop nop
|
|
// Bit 3:
|
|
"out %0, %1\n\t" // 1 PORT = hi
|
|
"mov %3, %4\n\t" // 1 n2 = lo
|
|
"out %0, %2\n\t" // 1 PORT = n1
|
|
"rjmp .+0\n\t" // 2 nop nop
|
|
"sbrc %5, 2\n\t" // 1-2 if(b & 0x04)
|
|
"mov %3, %1\n\t" // 0-1 n2 = hi
|
|
"out %0, %4\n\t" // 1 PORT = lo
|
|
"rjmp .+0\n\t" // 2 nop nop
|
|
// Bit 2:
|
|
"out %0, %1\n\t" // 1 PORT = hi
|
|
"mov %2, %4\n\t" // 1 n1 = lo
|
|
"out %0, %3\n\t" // 1 PORT = n2
|
|
"rjmp .+0\n\t" // 2 nop nop
|
|
"sbrc %5, 1\n\t" // 1-2 if(b & 0x02)
|
|
"mov %2, %1\n\t" // 0-1 n1 = hi
|
|
"out %0, %4\n\t" // 1 PORT = lo
|
|
"rjmp .+0\n\t" // 2 nop nop
|
|
// Bit 1:
|
|
"out %0, %1\n\t" // 1 PORT = hi
|
|
"mov %3, %4\n\t" // 1 n2 = lo
|
|
"out %0, %2\n\t" // 1 PORT = n1
|
|
"rjmp .+0\n\t" // 2 nop nop
|
|
"sbrc %5, 0\n\t" // 1-2 if(b & 0x01)
|
|
"mov %3, %1\n\t" // 0-1 n2 = hi
|
|
"out %0, %4\n\t" // 1 PORT = lo
|
|
"sbiw %6, 1\n\t" // 2 i-- (dec. but don't act on zero flag yet)
|
|
// Bit 0:
|
|
"out %0, %1\n\t" // 1 PORT = hi
|
|
"mov %2, %4\n\t" // 1 n1 = lo
|
|
"out %0, %3\n\t" // 1 PORT = n2
|
|
"ld %5, %a7+\n\t" // 2 b = *ptr++
|
|
"sbrc %5, 7\n\t" // 1-2 if(b & 0x80)
|
|
"mov %2, %1\n\t" // 0-1 n1 = hi
|
|
"out %0, %4\n\t" // 1 PORT = lo
|
|
"brne headD\n" // 2 while(i) (zero flag determined above)
|
|
::
|
|
"I" (_SFR_IO_ADDR(PORTD)), // %0
|
|
"r" (hi), // %1
|
|
"r" (n1), // %2
|
|
"r" (n2), // %3
|
|
"r" (lo), // %4
|
|
"r" (b), // %5
|
|
"w" (i), // %6
|
|
"e" (ptr) // %a7
|
|
); // end asm
|
|
|
|
} else if(port == &PORTB) {
|
|
|
|
#endif // PORTD
|
|
|
|
// Same as above, just switched to PORTB and stripped of comments.
|
|
hi = PORTB | pinMask;
|
|
lo = PORTB & ~pinMask;
|
|
n1 = lo;
|
|
if(b & 0x80) n1 = hi;
|
|
|
|
asm volatile(
|
|
"headB:\n\t"
|
|
"out %0, %1\n\t"
|
|
"mov %3, %4\n\t"
|
|
"out %0, %2\n\t"
|
|
"rjmp .+0\n\t"
|
|
"sbrc %5, 6\n\t"
|
|
"mov %3, %1\n\t"
|
|
"out %0, %4\n\t"
|
|
"rjmp .+0\n\t"
|
|
"out %0, %1\n\t"
|
|
"mov %2, %4\n\t"
|
|
"out %0, %3\n\t"
|
|
"rjmp .+0\n\t"
|
|
"sbrc %5, 5\n\t"
|
|
"mov %2, %1\n\t"
|
|
"out %0, %4\n\t"
|
|
"rjmp .+0\n\t"
|
|
"out %0, %1\n\t"
|
|
"mov %3, %4\n\t"
|
|
"out %0, %2\n\t"
|
|
"rjmp .+0\n\t"
|
|
"sbrc %5, 4\n\t"
|
|
"mov %3, %1\n\t"
|
|
"out %0, %4\n\t"
|
|
"rjmp .+0\n\t"
|
|
"out %0, %1\n\t"
|
|
"mov %2, %4\n\t"
|
|
"out %0, %3\n\t"
|
|
"rjmp .+0\n\t"
|
|
"sbrc %5, 3\n\t"
|
|
"mov %2, %1\n\t"
|
|
"out %0, %4\n\t"
|
|
"rjmp .+0\n\t"
|
|
"out %0, %1\n\t"
|
|
"mov %3, %4\n\t"
|
|
"out %0, %2\n\t"
|
|
"rjmp .+0\n\t"
|
|
"sbrc %5, 2\n\t"
|
|
"mov %3, %1\n\t"
|
|
"out %0, %4\n\t"
|
|
"rjmp .+0\n\t"
|
|
"out %0, %1\n\t"
|
|
"mov %2, %4\n\t"
|
|
"out %0, %3\n\t"
|
|
"rjmp .+0\n\t"
|
|
"sbrc %5, 1\n\t"
|
|
"mov %2, %1\n\t"
|
|
"out %0, %4\n\t"
|
|
"rjmp .+0\n\t"
|
|
"out %0, %1\n\t"
|
|
"mov %3, %4\n\t"
|
|
"out %0, %2\n\t"
|
|
"rjmp .+0\n\t"
|
|
"sbrc %5, 0\n\t"
|
|
"mov %3, %1\n\t"
|
|
"out %0, %4\n\t"
|
|
"sbiw %6, 1\n\t"
|
|
"out %0, %1\n\t"
|
|
"mov %2, %4\n\t"
|
|
"out %0, %3\n\t"
|
|
"ld %5, %a7+\n\t"
|
|
"sbrc %5, 7\n\t"
|
|
"mov %2, %1\n\t"
|
|
"out %0, %4\n\t"
|
|
"brne headB\n" :: "I" (_SFR_IO_ADDR(PORTB)), "r" (hi),
|
|
"r" (n1), "r" (n2), "r" (lo), "r" (b), "w" (i), "e" (ptr));
|
|
|
|
#ifdef PORTD
|
|
} // endif PORTB
|
|
#endif
|
|
|
|
} else { // end 800 KHz, do 400 KHz
|
|
|
|
// Timing is more relaxed; unrolling the inner loop for each bit is
|
|
// not necessary. Still using the peculiar RJMPs as 2X NOPs, not out
|
|
// of need but just to trim the code size down a little.
|
|
// This 400-KHz-datastream-on-8-MHz-CPU code is not quite identical
|
|
// to the 800-on-16 code later -- the hi/lo timing between WS2811 and
|
|
// WS2812 is not simply a 2:1 scale!
|
|
|
|
// 20 inst. clocks per bit: HHHHxxxxxxLLLLLLLLLL
|
|
// ST instructions: ^ ^ ^ (T=0,4,10)
|
|
|
|
volatile uint8_t next, bit;
|
|
|
|
hi = *port | pinMask;
|
|
lo = *port & ~pinMask;
|
|
next = lo;
|
|
bit = 8;
|
|
|
|
asm volatile(
|
|
"head20:\n\t" // Clk Pseudocode (T = 0)
|
|
"st %a0, %1\n\t" // 2 PORT = hi (T = 2)
|
|
"sbrc %2, 7\n\t" // 1-2 if(b & 128)
|
|
"mov %4, %1\n\t" // 0-1 next = hi (T = 4)
|
|
"st %a0, %4\n\t" // 2 PORT = next (T = 6)
|
|
"mov %4, %5\n\t" // 1 next = lo (T = 7)
|
|
"dec %3\n\t" // 1 bit-- (T = 8)
|
|
"breq nextbyte20\n\t" // 1-2 if(bit == 0)
|
|
"rol %2\n\t" // 1 b <<= 1 (T = 10)
|
|
"st %a0, %5\n\t" // 2 PORT = lo (T = 12)
|
|
"rjmp .+0\n\t" // 2 nop nop (T = 14)
|
|
"rjmp .+0\n\t" // 2 nop nop (T = 16)
|
|
"rjmp .+0\n\t" // 2 nop nop (T = 18)
|
|
"rjmp head20\n\t" // 2 -> head20 (next bit out)
|
|
"nextbyte20:\n\t" // (T = 10)
|
|
"st %a0, %5\n\t" // 2 PORT = lo (T = 12)
|
|
"nop\n\t" // 1 nop (T = 13)
|
|
"ldi %3, 8\n\t" // 1 bit = 8 (T = 14)
|
|
"ld %2, %a6+\n\t" // 2 b = *ptr++ (T = 16)
|
|
"sbiw %7, 1\n\t" // 2 i-- (T = 18)
|
|
"brne head20\n\t" // 2 if(i != 0) -> head20 (next byte)
|
|
::
|
|
"e" (port), // %a0
|
|
"r" (hi), // %1
|
|
"r" (b), // %2
|
|
"r" (bit), // %3
|
|
"r" (next), // %4
|
|
"r" (lo), // %5
|
|
"e" (ptr), // %a6
|
|
"w" (i) // %7
|
|
); // end asm
|
|
}
|
|
|
|
// 12 MHz(ish) AVR --------------------------------------------------------
|
|
#elif (F_CPU >= 11100000UL) && (F_CPU <= 14300000UL)
|
|
|
|
if((type & NEO_SPDMASK) == NEO_KHZ800) { // 800 KHz bitstream
|
|
|
|
// In the 12 MHz case, an optimized 800 KHz datastream (no dead time
|
|
// between bytes) requires a PORT-specific loop similar to the 8 MHz
|
|
// code (but a little more relaxed in this case).
|
|
|
|
// 15 instruction clocks per bit: HHHHxxxxxxLLLLL
|
|
// OUT instructions: ^ ^ ^ (T=0,4,10)
|
|
|
|
volatile uint8_t next;
|
|
|
|
#ifdef PORTD
|
|
|
|
if(port == &PORTD) {
|
|
|
|
hi = PORTD | pinMask;
|
|
lo = PORTD & ~pinMask;
|
|
next = lo;
|
|
if(b & 0x80) next = hi;
|
|
|
|
// Don't "optimize" the OUT calls into the bitTime subroutine;
|
|
// we're exploiting the RCALL and RET as 3- and 4-cycle NOPs!
|
|
asm volatile(
|
|
"headD:\n\t" // (T = 0)
|
|
"out %0, %1\n\t" // (T = 1)
|
|
"rcall bitTimeD\n\t" // Bit 7 (T = 15)
|
|
"out %0, %1\n\t"
|
|
"rcall bitTimeD\n\t" // Bit 6
|
|
"out %0, %1\n\t"
|
|
"rcall bitTimeD\n\t" // Bit 5
|
|
"out %0, %1\n\t"
|
|
"rcall bitTimeD\n\t" // Bit 4
|
|
"out %0, %1\n\t"
|
|
"rcall bitTimeD\n\t" // Bit 3
|
|
"out %0, %1\n\t"
|
|
"rcall bitTimeD\n\t" // Bit 2
|
|
"out %0, %1\n\t"
|
|
"rcall bitTimeD\n\t" // Bit 1
|
|
// Bit 0:
|
|
"out %0, %1\n\t" // 1 PORT = hi (T = 1)
|
|
"rjmp .+0\n\t" // 2 nop nop (T = 3)
|
|
"ld %4, %a5+\n\t" // 2 b = *ptr++ (T = 5)
|
|
"out %0, %2\n\t" // 1 PORT = next (T = 6)
|
|
"mov %2, %3\n\t" // 1 next = lo (T = 7)
|
|
"sbrc %4, 7\n\t" // 1-2 if(b & 0x80) (T = 8)
|
|
"mov %2, %1\n\t" // 0-1 next = hi (T = 9)
|
|
"nop\n\t" // 1 (T = 10)
|
|
"out %0, %3\n\t" // 1 PORT = lo (T = 11)
|
|
"sbiw %6, 1\n\t" // 2 i-- (T = 13)
|
|
"brne headD\n\t" // 2 if(i != 0) -> headD (next byte)
|
|
"rjmp doneD\n\t"
|
|
"bitTimeD:\n\t" // nop nop nop (T = 4)
|
|
"out %0, %2\n\t" // 1 PORT = next (T = 5)
|
|
"mov %2, %3\n\t" // 1 next = lo (T = 6)
|
|
"rol %4\n\t" // 1 b <<= 1 (T = 7)
|
|
"sbrc %4, 7\n\t" // 1-2 if(b & 0x80) (T = 8)
|
|
"mov %2, %1\n\t" // 0-1 next = hi (T = 9)
|
|
"nop\n\t" // 1 (T = 10)
|
|
"out %0, %3\n\t" // 1 PORT = lo (T = 11)
|
|
"ret\n\t" // 4 nop nop nop nop (T = 15)
|
|
"doneD:\n\t"
|
|
::
|
|
"I" (_SFR_IO_ADDR(PORTD)), // %0
|
|
"r" (hi), // %1
|
|
"r" (next), // %2
|
|
"r" (lo), // %3
|
|
"r" (b), // %4
|
|
"e" (ptr), // %a5
|
|
"w" (i) // %6
|
|
); // end asm
|
|
|
|
} else if(port == &PORTB) {
|
|
|
|
#endif // PORTD
|
|
|
|
hi = PORTB | pinMask;
|
|
lo = PORTB & ~pinMask;
|
|
next = lo;
|
|
if(b & 0x80) next = hi;
|
|
|
|
// Same as above, just set for PORTB & stripped of comments
|
|
asm volatile(
|
|
"headB:\n\t"
|
|
"out %0, %1\n\t"
|
|
"rcall bitTimeB\n\t"
|
|
"out %0, %1\n\t"
|
|
"rcall bitTimeB\n\t"
|
|
"out %0, %1\n\t"
|
|
"rcall bitTimeB\n\t"
|
|
"out %0, %1\n\t"
|
|
"rcall bitTimeB\n\t"
|
|
"out %0, %1\n\t"
|
|
"rcall bitTimeB\n\t"
|
|
"out %0, %1\n\t"
|
|
"rcall bitTimeB\n\t"
|
|
"out %0, %1\n\t"
|
|
"rcall bitTimeB\n\t"
|
|
"out %0, %1\n\t"
|
|
"rjmp .+0\n\t"
|
|
"ld %4, %a5+\n\t"
|
|
"out %0, %2\n\t"
|
|
"mov %2, %3\n\t"
|
|
"sbrc %4, 7\n\t"
|
|
"mov %2, %1\n\t"
|
|
"nop\n\t"
|
|
"out %0, %3\n\t"
|
|
"sbiw %6, 1\n\t"
|
|
"brne headB\n\t"
|
|
"rjmp doneB\n\t"
|
|
"bitTimeB:\n\t"
|
|
"out %0, %2\n\t"
|
|
"mov %2, %3\n\t"
|
|
"rol %4\n\t"
|
|
"sbrc %4, 7\n\t"
|
|
"mov %2, %1\n\t"
|
|
"nop\n\t"
|
|
"out %0, %3\n\t"
|
|
"ret\n\t"
|
|
"doneB:\n\t" :: "I" (_SFR_IO_ADDR(PORTB)), "r" (hi), "r" (next),
|
|
"r" (lo), "r" (b), "e" (ptr), "w" (i));
|
|
|
|
#ifdef PORTD
|
|
}
|
|
#endif
|
|
|
|
} else { // 400 KHz
|
|
|
|
// 30 instruction clocks per bit: HHHHHHxxxxxxxxxLLLLLLLLLLLLLLL
|
|
// ST instructions: ^ ^ ^ (T=0,6,15)
|
|
|
|
volatile uint8_t next, bit;
|
|
|
|
hi = *port | pinMask;
|
|
lo = *port & ~pinMask;
|
|
next = lo;
|
|
bit = 8;
|
|
|
|
asm volatile(
|
|
"head30:\n\t" // Clk Pseudocode (T = 0)
|
|
"st %a0, %1\n\t" // 2 PORT = hi (T = 2)
|
|
"sbrc %2, 7\n\t" // 1-2 if(b & 128)
|
|
"mov %4, %1\n\t" // 0-1 next = hi (T = 4)
|
|
"rjmp .+0\n\t" // 2 nop nop (T = 6)
|
|
"st %a0, %4\n\t" // 2 PORT = next (T = 8)
|
|
"rjmp .+0\n\t" // 2 nop nop (T = 10)
|
|
"rjmp .+0\n\t" // 2 nop nop (T = 12)
|
|
"rjmp .+0\n\t" // 2 nop nop (T = 14)
|
|
"nop\n\t" // 1 nop (T = 15)
|
|
"st %a0, %5\n\t" // 2 PORT = lo (T = 17)
|
|
"rjmp .+0\n\t" // 2 nop nop (T = 19)
|
|
"dec %3\n\t" // 1 bit-- (T = 20)
|
|
"breq nextbyte30\n\t" // 1-2 if(bit == 0)
|
|
"rol %2\n\t" // 1 b <<= 1 (T = 22)
|
|
"rjmp .+0\n\t" // 2 nop nop (T = 24)
|
|
"rjmp .+0\n\t" // 2 nop nop (T = 26)
|
|
"rjmp .+0\n\t" // 2 nop nop (T = 28)
|
|
"rjmp head30\n\t" // 2 -> head30 (next bit out)
|
|
"nextbyte30:\n\t" // (T = 22)
|
|
"nop\n\t" // 1 nop (T = 23)
|
|
"ldi %3, 8\n\t" // 1 bit = 8 (T = 24)
|
|
"ld %2, %a6+\n\t" // 2 b = *ptr++ (T = 26)
|
|
"sbiw %7, 1\n\t" // 2 i-- (T = 28)
|
|
"brne head30\n\t" // 1-2 if(i != 0) -> head30 (next byte)
|
|
::
|
|
"e" (port), // %a0
|
|
"r" (hi), // %1
|
|
"r" (b), // %2
|
|
"r" (bit), // %3
|
|
"r" (next), // %4
|
|
"r" (lo), // %5
|
|
"e" (ptr), // %a6
|
|
"w" (i) // %7
|
|
); // end asm
|
|
}
|
|
|
|
// 16 MHz(ish) AVR --------------------------------------------------------
|
|
#elif (F_CPU >= 15400000UL) && (F_CPU <= 19000000L)
|
|
|
|
if((type & NEO_SPDMASK) == NEO_KHZ800) { // 800 KHz bitstream
|
|
|
|
// WS2811 and WS2812 have different hi/lo duty cycles; this is
|
|
// similar but NOT an exact copy of the prior 400-on-8 code.
|
|
|
|
// 20 inst. clocks per bit: HHHHHxxxxxxxxLLLLLLL
|
|
// ST instructions: ^ ^ ^ (T=0,5,13)
|
|
|
|
volatile uint8_t next, bit;
|
|
|
|
hi = *port | pinMask;
|
|
lo = *port & ~pinMask;
|
|
next = lo;
|
|
bit = 8;
|
|
|
|
asm volatile(
|
|
"head20:\n\t" // Clk Pseudocode (T = 0)
|
|
"st %a0, %1\n\t" // 2 PORT = hi (T = 2)
|
|
"sbrc %2, 7\n\t" // 1-2 if(b & 128)
|
|
"mov %4, %1\n\t" // 0-1 next = hi (T = 4)
|
|
"dec %3\n\t" // 1 bit-- (T = 5)
|
|
"st %a0, %4\n\t" // 2 PORT = next (T = 7) ST and MOV don't
|
|
"mov %4, %5\n\t" // 1 next = lo (T = 8) change Z flag,
|
|
"breq nextbyte20\n\t" // 1-2 if(bit == 0) <-- so this is OK.
|
|
"rol %2\n\t" // 1 b <<= 1 (T = 10)
|
|
"rjmp .+0\n\t" // 2 nop nop (T = 12)
|
|
"nop\n\t" // 1 nop (T = 13)
|
|
"st %a0, %5\n\t" // 2 PORT = lo (T = 15)
|
|
"nop\n\t" // 1 nop (T = 16)
|
|
"rjmp .+0\n\t" // 2 nop nop (T = 18)
|
|
"rjmp head20\n\t" // 2 -> head20 (next bit out)
|
|
"nextbyte20:\n\t" // (T = 10)
|
|
"ldi %3, 8\n\t" // 1 bit = 8 (T = 11)
|
|
"ld %2, %a6+\n\t" // 2 b = *ptr++ (T = 13)
|
|
"st %a0, %5\n\t" // 2 PORT = lo (T = 15)
|
|
"nop\n\t" // 1 nop (T = 16)
|
|
"sbiw %7, 1\n\t" // 2 i-- (T = 18)
|
|
"brne head20\n\t" // 2 if(i != 0) -> head20 (next byte)
|
|
::
|
|
"e" (port), // %a0
|
|
"r" (hi), // %1
|
|
"r" (b), // %2
|
|
"r" (bit), // %3
|
|
"r" (next), // %4
|
|
"r" (lo), // %5
|
|
"e" (ptr), // %a6
|
|
"w" (i) // %7
|
|
); // end asm
|
|
|
|
} else { // 400 KHz
|
|
|
|
// The 400 KHz clock on 16 MHz MCU is the most 'relaxed' version.
|
|
|
|
// 40 inst. clocks per bit: HHHHHHHHxxxxxxxxxxxxLLLLLLLLLLLLLLLLLLLL
|
|
// ST instructions: ^ ^ ^ (T=0,8,20)
|
|
|
|
volatile uint8_t next, bit;
|
|
|
|
hi = *port | pinMask;
|
|
lo = *port & ~pinMask;
|
|
next = lo;
|
|
bit = 8;
|
|
|
|
asm volatile(
|
|
"head40:\n\t" // Clk Pseudocode (T = 0)
|
|
"st %a0, %1\n\t" // 2 PORT = hi (T = 2)
|
|
"sbrc %2, 7\n\t" // 1-2 if(b & 128)
|
|
"mov %4, %1\n\t" // 0-1 next = hi (T = 4)
|
|
"rjmp .+0\n\t" // 2 nop nop (T = 6)
|
|
"rjmp .+0\n\t" // 2 nop nop (T = 8)
|
|
"st %a0, %4\n\t" // 2 PORT = next (T = 10)
|
|
"rjmp .+0\n\t" // 2 nop nop (T = 12)
|
|
"rjmp .+0\n\t" // 2 nop nop (T = 14)
|
|
"rjmp .+0\n\t" // 2 nop nop (T = 16)
|
|
"rjmp .+0\n\t" // 2 nop nop (T = 18)
|
|
"rjmp .+0\n\t" // 2 nop nop (T = 20)
|
|
"st %a0, %5\n\t" // 2 PORT = lo (T = 22)
|
|
"nop\n\t" // 1 nop (T = 23)
|
|
"mov %4, %5\n\t" // 1 next = lo (T = 24)
|
|
"dec %3\n\t" // 1 bit-- (T = 25)
|
|
"breq nextbyte40\n\t" // 1-2 if(bit == 0)
|
|
"rol %2\n\t" // 1 b <<= 1 (T = 27)
|
|
"nop\n\t" // 1 nop (T = 28)
|
|
"rjmp .+0\n\t" // 2 nop nop (T = 30)
|
|
"rjmp .+0\n\t" // 2 nop nop (T = 32)
|
|
"rjmp .+0\n\t" // 2 nop nop (T = 34)
|
|
"rjmp .+0\n\t" // 2 nop nop (T = 36)
|
|
"rjmp .+0\n\t" // 2 nop nop (T = 38)
|
|
"rjmp head40\n\t" // 2 -> head40 (next bit out)
|
|
"nextbyte40:\n\t" // (T = 27)
|
|
"ldi %3, 8\n\t" // 1 bit = 8 (T = 28)
|
|
"ld %2, %a6+\n\t" // 2 b = *ptr++ (T = 30)
|
|
"rjmp .+0\n\t" // 2 nop nop (T = 32)
|
|
"st %a0, %5\n\t" // 2 PORT = lo (T = 34)
|
|
"rjmp .+0\n\t" // 2 nop nop (T = 36)
|
|
"sbiw %7, 1\n\t" // 2 i-- (T = 38)
|
|
"brne head40\n\t" // 1-2 if(i != 0) -> head40 (next byte)
|
|
::
|
|
"e" (port), // %a0
|
|
"r" (hi), // %1
|
|
"r" (b), // %2
|
|
"r" (bit), // %3
|
|
"r" (next), // %4
|
|
"r" (lo), // %5
|
|
"e" (ptr), // %a6
|
|
"w" (i) // %7
|
|
); // end asm
|
|
|
|
}
|
|
|
|
#else
|
|
#error "CPU SPEED NOT SUPPORTED"
|
|
#endif
|
|
|
|
#elif defined(__arm__)
|
|
|
|
// Paul Stoffregen: "This implementation may not be quite perfect, but
|
|
// it seems to work reasonably well with an actual 20 LED WS2811 strip.
|
|
// The timing at 48 MHz is off a bit, perhaps due to flash cache misses?
|
|
// Ideally this code should execute from RAM to eliminate slight timing
|
|
// differences between flash caches hits and misses. But it seems to
|
|
// [run] quite well. More testing is needed with longer strips."
|
|
|
|
/* If timing can be stabilized, something like this should work:
|
|
#define DELAY_800_T0H (0.40 * F_CPU / 1000000L / DCYC + 0.5)
|
|
#define DELAY_800_T0L (0.85 * F_CPU / 1000000L / DCYC + 0.5)
|
|
#define DELAY_800_T1H (0.80 * F_CPU / 1000000L / DCYC + 0.5)
|
|
#define DELAY_800_T1L (0.45 * F_CPU / 1000000L / DCYC + 0.5)
|
|
#define DELAY_400_T0H (0.50 * F_CPU / 1000000L / DCYC + 0.5)
|
|
#define DELAY_400_T0L (2.00 * F_CPU / 1000000L / DCYC + 0.5)
|
|
#define DELAY_400_T1H (1.20 * F_CPU / 1000000L / DCYC + 0.5)
|
|
#define DELAY_400_T1L (1.30 * F_CPU / 1000000L / DCYC + 0.5)
|
|
But in the meantime, a fixed set of tables is used:
|
|
*/
|
|
|
|
#ifdef __MK20DX128__ // Teensy 3.0
|
|
|
|
#if (F_CPU == 24000000)
|
|
#define DELAY_800_T0H 2
|
|
#define DELAY_800_T0L 4
|
|
#define DELAY_800_T1H 5
|
|
#define DELAY_800_T1L 1
|
|
#define DELAY_400_T0H 3
|
|
#define DELAY_400_T0L 10
|
|
#define DELAY_400_T1H 9
|
|
#define DELAY_400_T1L 5
|
|
#elif (F_CPU == 48000000)
|
|
#define DELAY_800_T0H 4
|
|
#define DELAY_800_T0L 9
|
|
#define DELAY_800_T1H 12
|
|
#define DELAY_800_T1L 1
|
|
#define DELAY_400_T0H 6
|
|
#define DELAY_400_T0L 20
|
|
#define DELAY_400_T1H 18
|
|
#define DELAY_400_T1L 11
|
|
#elif (F_CPU == 96000000)
|
|
#define DELAY_800_T0H 7
|
|
#define DELAY_800_T0L 17
|
|
#define DELAY_800_T1H 22
|
|
#define DELAY_800_T1L 2
|
|
#define DELAY_400_T0H 12
|
|
#define DELAY_400_T0L 40
|
|
#define DELAY_400_T1H 36
|
|
#define DELAY_400_T1L 22
|
|
#else
|
|
#error "CPU SPEED NOT SUPPORTED"
|
|
#endif
|
|
|
|
volatile uint8_t *set = portSetRegister(pin);
|
|
volatile uint8_t *clr = portClearRegister(pin);
|
|
#define SET_HI *set = 1;
|
|
#define SET_LO *clr = 1;
|
|
uint8_t *p = pixels,
|
|
*end = p + numBytes, pix, mask;
|
|
|
|
if((type & NEO_SPDMASK) == NEO_KHZ800) { // 800 KHz bitstream
|
|
while(p < end) {
|
|
pix = *p++;
|
|
for(mask = 0x80; mask; mask >>= 1) {
|
|
SET_HI
|
|
if(pix & mask) {
|
|
delayShort(DELAY_800_T1H);
|
|
SET_LO
|
|
delayShort(DELAY_800_T1L);
|
|
} else {
|
|
delayShort(DELAY_800_T0H);
|
|
SET_LO
|
|
delayShort(DELAY_800_T0L);
|
|
}
|
|
}
|
|
}
|
|
} else { // 400 kHz bitstream
|
|
while(p < end) {
|
|
pix = *p++;
|
|
for(mask = 0x80; mask; mask >>= 1) {
|
|
SET_HI
|
|
if(pix & mask) {
|
|
delayShort(DELAY_400_T1H);
|
|
SET_LO
|
|
delayShort(DELAY_400_T1L);
|
|
} else {
|
|
delayShort(DELAY_400_T0H);
|
|
SET_LO
|
|
delayShort(DELAY_400_T0L);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#else // Arduino Due
|
|
|
|
#define SCALE VARIANT_MCK / 2UL / 1000000UL
|
|
#define INST (2UL * F_CPU / VARIANT_MCK)
|
|
#define TIME_800_L ((int)(0.40 * SCALE + 0.5) - (5 * INST))
|
|
#define TIME_800_H ((int)(0.80 * SCALE + 0.5) - (5 * INST))
|
|
#define PERIOD_800 ((int)(1.25 * SCALE + 0.5) - (5 * INST))
|
|
#define TIME_400_L ((int)(0.50 * SCALE + 0.5) - (5 * INST))
|
|
#define TIME_400_H ((int)(1.20 * SCALE + 0.5) - (5 * INST))
|
|
#define PERIOD_400 ((int)(2.50 * SCALE + 0.5) - (5 * INST))
|
|
|
|
int pinMask, timeLo, timeHi, period, t;
|
|
Pio *port;
|
|
volatile WoReg *portSet, *portClear, *timeValue, *timeReset;
|
|
uint8_t *p, *end, pix, mask;
|
|
|
|
pmc_set_writeprotect(false);
|
|
pmc_enable_periph_clk((uint32_t)TC3_IRQn);
|
|
TC_Configure(TC1, 0,
|
|
TC_CMR_WAVE | TC_CMR_WAVSEL_UP | TC_CMR_TCCLKS_TIMER_CLOCK1);
|
|
TC_Start(TC1, 0);
|
|
|
|
pinMask = g_APinDescription[pin].ulPin; // Don't 'optimize' these into
|
|
port = g_APinDescription[pin].pPort; // declarations above. Want to
|
|
portSet = &(port->PIO_SODR); // burn a few cycles after
|
|
portClear = &(port->PIO_CODR); // starting timer to minimize
|
|
timeValue = &(TC1->TC_CHANNEL[0].TC_CV); // the initial 'while'.
|
|
timeReset = &(TC1->TC_CHANNEL[0].TC_CCR);
|
|
p = pixels;
|
|
end = p + numBytes;
|
|
pix = *p++;
|
|
mask = 0x80;
|
|
|
|
if((type & NEO_SPDMASK) == NEO_KHZ800) { // 800 KHz bitstream
|
|
timeLo = TIME_800_L;
|
|
timeHi = TIME_800_H;
|
|
period = PERIOD_800;
|
|
} else { // 400 KHz bitstream
|
|
timeLo = TIME_400_L;
|
|
timeHi = TIME_400_H;
|
|
period = PERIOD_400;
|
|
}
|
|
|
|
for(t = timeLo;; t = timeLo) {
|
|
if(pix & mask) t = timeHi;
|
|
while(*timeValue < period);
|
|
*portSet = pinMask;
|
|
*timeReset = TC_CCR_CLKEN | TC_CCR_SWTRG;
|
|
while(*timeValue < t);
|
|
*portClear = pinMask;
|
|
if(!(mask >>= 1)) { // This 'inside-out' loop logic utilizes
|
|
if(p >= end) break; // idle time to minimize inter-byte delays.
|
|
pix = *p++;
|
|
mask = 0x80;
|
|
}
|
|
}
|
|
while(*timeValue < period); // Wait for last bit
|
|
TC_Stop(TC1, 0);
|
|
|
|
#endif // end Arduino Due
|
|
|
|
#endif // end Architecture select
|
|
|
|
interrupts();
|
|
endTime = micros(); // Save EOD time for latch on next call
|
|
}
|
|
|
|
// Set pixel color from separate R,G,B components:
|
|
void Adafruit_NeoPixel::setPixelColor(
|
|
uint16_t n, uint8_t r, uint8_t g, uint8_t b) {
|
|
if(n < numLEDs) {
|
|
if(brightness) { // See notes in setBrightness()
|
|
r = (r * brightness) >> 8;
|
|
g = (g * brightness) >> 8;
|
|
b = (b * brightness) >> 8;
|
|
}
|
|
uint8_t *p = &pixels[n * 3];
|
|
if((type & NEO_COLMASK) == NEO_GRB) { *p++ = g; *p++ = r; }
|
|
else { *p++ = r; *p++ = g; }
|
|
*p = b;
|
|
}
|
|
}
|
|
|
|
// Set pixel color from 'packed' 32-bit RGB color:
|
|
void Adafruit_NeoPixel::setPixelColor(uint16_t n, uint32_t c) {
|
|
if(n < numLEDs) {
|
|
uint8_t
|
|
r = (uint8_t)(c >> 16),
|
|
g = (uint8_t)(c >> 8),
|
|
b = (uint8_t)c;
|
|
if(brightness) { // See notes in setBrightness()
|
|
r = (r * brightness) >> 8;
|
|
g = (g * brightness) >> 8;
|
|
b = (b * brightness) >> 8;
|
|
}
|
|
uint8_t *p = &pixels[n * 3];
|
|
if((type & NEO_COLMASK) == NEO_GRB) { *p++ = g; *p++ = r; }
|
|
else { *p++ = r; *p++ = g; }
|
|
*p = b;
|
|
}
|
|
}
|
|
|
|
// Convert separate R,G,B into packed 32-bit RGB color.
|
|
// Packed format is always RGB, regardless of LED strand color order.
|
|
uint32_t Adafruit_NeoPixel::Color(uint8_t r, uint8_t g, uint8_t b) {
|
|
return ((uint32_t)r << 16) | ((uint32_t)g << 8) | b;
|
|
}
|
|
|
|
// Query color from previously-set pixel (returns packed 32-bit RGB value)
|
|
uint32_t Adafruit_NeoPixel::getPixelColor(uint16_t n) {
|
|
|
|
if(n < numLEDs) {
|
|
uint16_t ofs = n * 3;
|
|
return (uint32_t)(pixels[ofs + 2]) |
|
|
(((type & NEO_COLMASK) == NEO_GRB) ?
|
|
((uint32_t)(pixels[ofs ]) << 8) |
|
|
((uint32_t)(pixels[ofs + 1]) << 16)
|
|
:
|
|
((uint32_t)(pixels[ofs ]) << 16) |
|
|
((uint32_t)(pixels[ofs + 1]) << 8) );
|
|
}
|
|
|
|
return 0; // Pixel # is out of bounds
|
|
}
|
|
|
|
uint16_t Adafruit_NeoPixel::numPixels(void) {
|
|
return numLEDs;
|
|
}
|
|
|
|
// Adjust output brightness; 0=darkest (off), 255=brightest. This does
|
|
// NOT immediately affect what's currently displayed on the LEDs. The
|
|
// next call to show() will refresh the LEDs at this level. However,
|
|
// this process is potentially "lossy," especially when increasing
|
|
// brightness. The tight timing in the WS2811/WS2812 code means there
|
|
// aren't enough free cycles to perform this scaling on the fly as data
|
|
// is issued. So we make a pass through the existing color data in RAM
|
|
// and scale it (subsequent graphics commands also work at this
|
|
// brightness level). If there's a significant step up in brightness,
|
|
// the limited number of steps (quantization) in the old data will be
|
|
// quite visible in the re-scaled version. For a non-destructive
|
|
// change, you'll need to re-render the full strip data. C'est la vie.
|
|
void Adafruit_NeoPixel::setBrightness(uint8_t b) {
|
|
// Stored brightness value is different than what's passed.
|
|
// This simplifies the actual scaling math later, allowing a fast
|
|
// 8x8-bit multiply and taking the MSB. 'brightness' is a uint8_t,
|
|
// adding 1 here may (intentionally) roll over...so 0 = max brightness
|
|
// (color values are interpreted literally; no scaling), 1 = min
|
|
// brightness (off), 255 = just below max brightness.
|
|
uint8_t newBrightness = b + 1;
|
|
if(newBrightness != brightness) { // Compare against prior value
|
|
// Brightness has changed -- re-scale existing data in RAM
|
|
uint8_t c,
|
|
*ptr = pixels,
|
|
oldBrightness = brightness - 1; // De-wrap old brightness value
|
|
uint16_t scale;
|
|
if(oldBrightness == 0) scale = 0; // Avoid /0
|
|
else if(b == 255) scale = 65535 / oldBrightness;
|
|
else scale = (((uint16_t)newBrightness << 8) - 1) / oldBrightness;
|
|
for(uint16_t i=0; i<numBytes; i++) {
|
|
c = *ptr;
|
|
*ptr++ = (c * scale) >> 8;
|
|
}
|
|
brightness = newBrightness;
|
|
}
|
|
}
|