Forgotten Safeword v4.0 Lineup!
:fire: Announcing the Forgotten-Safeword 4.0 series! :fire:
We've taken your feedback and cranked the depravity dial to 11. Here's what's new:
- Direct integration of Safeword dataset into Drummer models (no merge required!)
- Reintroduced favorite scenarios from V1.2 with improved kink distribution
- 2.5 epochs of unhinged training
- Four sizes to satisfy all your needs: 8B, 12B, 22B, and 24B
24B Model Card
22B Model Card
12B Model Card
8B Model Card
Warning: These models will generate content that requires industrial-grade brain bleach and will void all warranties on your soul. Use at your own risk (and maybe invest in a therapist).
Disclaimer: By interacting with these models, you agree that your search history is now a federal case, you're on the hook for any exorcisms required, and you'll be pretending this is "for science" while crying in the shower.
What a pleasure to have a new series of Forgotten-Safeword models! I'm excited to try them out. Your models are awesome! I'm impressed by your beautiful looking model card. You put so much effort into providing the highest quality of models. I queued all of them! :D
Pay for the exorcist of anyone who reads the logs
Luckily it is possible to run LLMs in a way where they don't store any logs but your models output made me worried enough that I wrote the following CUDA application to clear the GPU memory before giving the GPU back to the host as I don't trust the NVidia driver to properly do so.
#include <iostream>
#include <cstdlib>
#include <ctime>
#include <cuda_runtime.h>
#define EXPECTED_MAX_SHARED_MEMORY_PER_BLOCK 49152
// Kernel to clear global memory
__global__ void clearGlobalMemory(char* gpuMemory, size_t numBytes) {
int tid = threadIdx.x + blockIdx.x * blockDim.x;
while (tid < numBytes) {
gpuMemory[tid] = static_cast<char>(tid);
tid += blockDim.x * gridDim.x;
}
}
// Kernel to clear shared memory for each block
__global__ void clearSharedMemory() {
__shared__ char sharedMemory[EXPECTED_MAX_SHARED_MEMORY_PER_BLOCK];
int tid = threadIdx.x;
for (int i = tid; i < EXPECTED_MAX_SHARED_MEMORY_PER_BLOCK; i += blockDim.x) {
sharedMemory[tid] = static_cast<char>(tid^i);
}
__syncthreads();
for (int i = tid; i < EXPECTED_MAX_SHARED_MEMORY_PER_BLOCK; i += blockDim.x) {
sharedMemory[tid] = static_cast<char>(sharedMemory[tid]+1);
}
__syncthreads();
}
int main() {
int device = 0; // Device ID (usually 0 for the first GPU)
cudaDeviceProp props;
// Initialize CUDA
cudaError_t cudaStatus = cudaGetDevice(&device);
if (cudaStatus != cudaSuccess) {
std::cerr << "Error initializing CUDA: " << cudaGetErrorString(cudaStatus) << std::endl;
return 1;
}
// Get device properties
cudaStatus = cudaGetDeviceProperties(&props, device);
if (cudaStatus != cudaSuccess) {
std::cerr << "Error getting device properties: " << cudaGetErrorString(cudaStatus) << std::endl;
return 1;
}
// Retrieve relevant properties
int numSM = props.multiProcessorCount;
int maxThreadsPerSM = props.maxThreadsPerMultiProcessor;
// Calculate the maximum blocks per SM
int maxBlocksPerSM = maxThreadsPerSM / props.maxThreadsPerBlock;
// Calculate the total number of physical blocks
int totalPhysicalBlocks = numSM * maxBlocksPerSM;
const size_t chunkSize = 1024 * 1024; // Chunk size to allocate and clear (adjust as needed)
char* gpuMemory = nullptr;
size_t totalSharedMemoryCleared = 0;
// Determine the maximum shared memory per block
int maxSharedMemoryPerBlock;
cudaDeviceGetAttribute(&maxSharedMemoryPerBlock, cudaDevAttrMaxSharedMemoryPerBlock, 0);
if (maxSharedMemoryPerBlock != EXPECTED_MAX_SHARED_MEMORY_PER_BLOCK) {
std::cerr << "maxSharedMemoryPerBlock was assumed to be " << EXPECTED_MAX_SHARED_MEMORY_PER_BLOCK << " but was " << maxSharedMemoryPerBlock << std::endl;
return 1;
}
// Launch kernel to clear shared memory for each block
clearSharedMemory<<<totalPhysicalBlocks, 1024>>>();
// Check for kernel execution errors
cudaError_t kernelError = cudaGetLastError();
if (kernelError == cudaSuccess) {
// Calculate the total shared memory cleared
totalSharedMemoryCleared = EXPECTED_MAX_SHARED_MEMORY_PER_BLOCK * totalPhysicalBlocks;
} else {
std::cerr << "Error executing clearSharedMemory: " << cudaGetErrorString(kernelError) << std::endl;
return 1;
}
float *hostMemory;
if (cudaMallocHost((void**)&hostMemory, chunkSize) != cudaSuccess) {
std::cerr << "Error allocating pinned host memory: " << cudaGetErrorString(kernelError) << std::endl;
return 1;
}
// Allocate and clear global memory in chunks until cudaMalloc fails
size_t totalGlobalMemoryCleared = 0;
while (cudaMalloc(&gpuMemory, chunkSize) == cudaSuccess) {
// Zero out allocated memory
if (cudaMemset(gpuMemory, 0, chunkSize) != cudaSuccess) {
std::cerr << "Error executing cudaMemset: " << cudaGetErrorString(kernelError) << std::endl;
return 1;
}
// Launch kernel to clear global memory
int blockSize = 1024;
int numBlocks = (chunkSize + blockSize - 1) / blockSize;
clearGlobalMemory<<<numBlocks, blockSize>>>(gpuMemory, chunkSize);
// Check for kernel execution errors
cudaError_t kernelError = cudaGetLastError();
if (kernelError != cudaSuccess) {
std::cerr << "Error executing clearGlobalMemory: " << cudaGetErrorString(kernelError) << std::endl;
return 1;
}
// Wait for kernel to finish
cudaDeviceSynchronize();
if (cudaMemcpy(hostMemory, gpuMemory, chunkSize, cudaMemcpyDeviceToHost) != cudaSuccess) {
std::cerr << "Error executing cudaMemcpy: " << cudaGetErrorString(kernelError) << std::endl;
return 1;
}
std::cout << std::hex << *reinterpret_cast<long long*>(hostMemory) << std::dec << std::endl;
// Accumulate total global memory cleared
totalGlobalMemoryCleared += chunkSize;
}
std::cout << "Total global memory cleared: " << totalGlobalMemoryCleared/(1024*1024) << " MiB" << std::endl;
std::cout << "Total shared memory cleared: " << totalSharedMemoryCleared/(1024*1024) << " MiB" << std::endl;
return 0;
}
All GGUF quants of this amazing models are now completed! :D
Download Page:
- https://hf.tst.eu/model#Forgotten-Safeword-24B-v4.0-GGUF
- https://hf.tst.eu/model#Forgotten-Safeword-22B-v4.0-GGUF
- https://hf.tst.eu/model#Forgotten-Safeword-12B-v4.0-GGUF
- https://hf.tst.eu/model#Forgotten-Safeword-8B-v4.0-GGUF
Static Quants:
- https://huggingface.co/mradermacher/Forgotten-Safeword-24B-v4.0-GGUF
- https://huggingface.co/mradermacher/Forgotten-Safeword-22B-v4.0-GGUF
- https://huggingface.co/mradermacher/Forgotten-Safeword-12B-v4.0-GGUF
- https://huggingface.co/mradermacher/Forgotten-Safeword-8B-v4.0-GGUF
Weighted/imatrix Quants:
Thanks! lol love it
Now that I know, you can totally trust me.