Files
2026-02-02 04:50:13 +01:00

7242 lines
269 KiB
C++

/* Copyright (c) <2003-2011> <Julio Jerez, Newton Game Dynamics>
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
*
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
*
* 3. This notice may not be removed or altered from any source distribution.
*/
#include "hpl1/engine/libraries/newton/core/dg.h"
#include "dgBody.h"
#include "dgConstraint.h"
#include "dgWorld.h"
#include "dgWorldDynamicUpdate.h"
#define DG_FREEZZING_VELOCITY_DRAG dgFloat32(0.9f)
#define DG_SOLVER_MAX_ERROR (DG_FREEZE_MAG * dgFloat32(0.5f))
#ifdef DG_BUILD_SIMD_CODE
#define DG_SIMD_WORD_SIZE dgInt32(sizeof(simd_type) / sizeof(dgFloat32))
#else
#define DG_SIMD_WORD_SIZE dgInt32(sizeof(dgVector) / sizeof(dgFloat32))
#endif
#define DG_PARALLEL_JOINT_COUNT 64
// in my twist to RK4 I am no sure if the final derivative have to be weighted, it seems a mistake, I need to investigate more
// uncomment this out for more stable behavior but no exactly correct solution on the iterative solver
//#define DG_WIGHT_FINAL_RK4_DERIVATIVES
#ifdef TARGET_OS_IPHONE
#define DG_BASE_ITERATION_COUNT 2
#else
#define DG_BASE_ITERATION_COUNT 3
#endif
class dgBodyInfo {
public:
dgBody *m_body;
};
class dgJointInfo {
public:
dgConstraint *m_joint;
dgInt32 m_autoPairstart;
dgInt32 m_autoPaircount;
dgInt32 m_autoPairActiveCount;
dgInt32 m_m0;
dgInt32 m_m1;
};
class dgIsland {
public:
dgInt32 m_bodyCount;
dgInt32 m_bodyStart;
dgInt32 m_jointCount;
dgInt32 m_jointStart;
dgInt32 m_hasUnilateralJoints : 1;
dgInt32 m_isContinueCollision : 1;
};
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
static inline dgInt32 CompareIslands(const dgIsland *const islandA,
const dgIsland *const islandB, void *notUsed) {
dgInt32 countA;
dgInt32 countB;
countA = islandA->m_jointCount - (islandA->m_hasUnilateralJoints << 23);
countB = islandB->m_jointCount - (islandB->m_hasUnilateralJoints << 23);
if (countA < countB) {
return -1;
}
if (countA > countB) {
return 1;
}
return 0;
}
dgBody *dgWorld::GetIslandBody(const void *const islandPtr, dgInt32 index) const {
const dgIslandCallbackStruct &island = *(const dgIslandCallbackStruct *)islandPtr;
char *ptr = &((char *)island.m_bodyArray)[island.m_strideInByte * index];
dgBody **bodyPtr = (dgBody **)ptr;
return (index < island.m_count) ? ((index >= 0) ? *bodyPtr : NULL) : NULL;
}
dgWorldDynamicUpdate::dgWorldDynamicUpdate() {
m_bodies = 0;
;
m_joints = 0;
m_islands = 0;
m_markLru = 0;
m_maxJointCount = 0;
m_maxBodiesCount = 0;
m_maxIslandCount = 0;
}
void dgWorldDynamicUpdate::UpdateDynamics(dgWorld *const world,
dgInt32 archModel, dgFloat32 timestep) {
dgUnsigned32 updateTime = world->m_getPerformanceCount();
m_bodies = 0;
m_joints = 0;
m_islands = 0;
m_markLru = 0;
m_world = world;
ReallocBodyMemory(0);
ReallocIslandMemory(0);
ReallocJointsMemory(0);
dgInt32 threadCounts = dgInt32(m_world->m_numberOfTheads);
for (dgInt32 i = 0; i < threadCounts; i++) {
m_solverMemory[i].m_world = m_world;
m_solverMemory[i].m_threadIndex = i;
ReallocJacobiansMemory(0, i);
ReallocIntenalForcesMemory(0, i);
}
m_world->m_dynamicsLru = m_world->m_dynamicsLru + 2;
m_markLru = dgInt32(m_world->m_dynamicsLru);
dgInt32 lru = m_markLru - 1;
dgBodyMasterList &me = *m_world;
NEWTON_ASSERT(me.GetFirst()->GetInfo().GetBody() == m_world->m_sentionelBody);
dgUnsigned32 solverMode = m_world->m_solverMode;
m_world->m_sentionelBody->m_index = 0;
m_world->m_sentionelBody->m_dynamicsLru = dgUnsigned32(m_markLru);
for (dgBodyMasterList::dgListNode *node = me.GetLast(); node;
node = node->GetPrev()) {
const dgBodyMasterListRow &graphNode = node->GetInfo();
dgBody *const body = graphNode.GetBody();
if (body->m_invMass.m_w == dgFloat32(0.0f)) {
#ifdef _DEBUG
for (; node; node = node->GetPrev()) {
NEWTON_ASSERT(node->GetInfo().GetBody()->m_invMass.m_w == dgFloat32(0.0f));
}
#endif
break;
}
if (dgInt32(body->m_dynamicsLru) < lru) {
if (!(body->m_freeze | body->m_spawnnedFromCallback | body->m_sleeping | !body->m_isInWorld)) {
SpanningTree(body);
}
}
body->m_spawnnedFromCallback = false;
}
dgSort(m_islandArray, m_islands, CompareIslands);
// dgRadixSort (m_islandArray, &m_islandArray[m_islands], m_islands, 3, GetIslandsKey);
dgUnsigned32 dynamicsTime = m_world->m_getPerformanceCount();
m_world->m_perfomanceCounters[m_dynamicsBuildSpanningTreeTicks] = dynamicsTime - updateTime;
if (threadCounts > 1) {
dgInt32 chunkSizes[DG_MAXIMUN_THREADS];
if (m_world->m_singleIslandMultithreading) {
const dgJacobianMemory &system = m_solverMemory[0];
while (m_islands && (m_islandArray[m_islands - 1].m_jointCount >= DG_PARALLEL_JOINT_COUNT)) {
m_islands--;
NEWTON_ASSERT(!m_islandArray[m_islands].m_isContinueCollision);
BuildJacobianMatrixParallel(m_islandArray[m_islands], timestep, archModel);
system.CalculateReactionsForcesParallel(dgInt32(solverMode), DG_SOLVER_MAX_ERROR, archModel);
IntegrateArray(&system.m_bodyArray[1], system.m_bodyCount - 1, DG_SOLVER_MAX_ERROR, timestep, 0, true);
}
}
if (m_islands) {
m_world->m_threadsManager.CalculateChunkSizes(m_islands, chunkSizes);
for (dgInt32 threadIndex = 0; threadIndex < threadCounts; threadIndex++) {
m_workerThreads[threadIndex].m_useSimd = archModel;
m_workerThreads[threadIndex].m_world = m_world;
m_workerThreads[threadIndex].m_dynamics = this;
m_workerThreads[threadIndex].m_count = chunkSizes[threadIndex] * threadCounts;
m_workerThreads[threadIndex].m_threads = threadCounts;
m_workerThreads[threadIndex].m_timestep = timestep;
m_workerThreads[threadIndex].m_solverMode = dgInt32(solverMode);
m_workerThreads[threadIndex].m_threadIndex = threadIndex;
m_workerThreads[threadIndex].m_system = &m_solverMemory[threadIndex];
m_world->m_threadsManager.SubmitJob(&m_workerThreads[threadIndex]);
}
m_world->m_threadsManager.SynchronizationBarrier();
}
} else {
m_workerThreads[0].m_threads = 1;
m_workerThreads[0].m_useSimd = archModel;
m_workerThreads[0].m_count = m_islands;
m_workerThreads[0].m_world = m_world;
m_workerThreads[0].m_dynamics = this;
m_workerThreads[0].m_threadIndex = 0;
m_workerThreads[0].m_solverMode = dgInt32(solverMode);
m_workerThreads[0].m_timestep = timestep;
m_workerThreads[0].m_system = &m_solverMemory[0];
m_workerThreads[0].ThreadExecute();
}
dgUnsigned32 ticks = m_world->m_getPerformanceCount();
m_world->m_perfomanceCounters[m_dynamicsSolveSpanningTreeTicks] = ticks - dynamicsTime;
m_world->m_perfomanceCounters[m_dynamicsTicks] = ticks - updateTime;
}
void dgSolverWorlkerThreads::ThreadExecute() {
const dgIsland *const m_islandArray = m_dynamics->m_islandArray;
dgContactPoint *const contactBuffer =
(dgContactPoint *)m_world->m_contactBuffers[m_threadIndex];
dgInt32 step = m_threads;
dgInt32 count = m_count;
if (m_useSimd) {
for (dgInt32 i = 0; i < count; i += step) {
const dgIsland &island = m_islandArray[i + m_threadIndex];
if (!island.m_isContinueCollision) {
m_dynamics->BuildJacobianMatrixSimd(island, m_threadIndex, m_timestep);
m_system->CalculateReactionsForcesSimd(m_solverMode,
DG_SOLVER_MAX_ERROR);
m_dynamics->IntegrateArray(&m_system->m_bodyArray[1],
m_system->m_bodyCount - 1, DG_SOLVER_MAX_ERROR, m_timestep,
m_threadIndex, true);
} else {
dgBodyInfo *const bodyArray =
&m_dynamics->m_bodyArray[island.m_bodyStart];
dgJointInfo *const constraintArray =
&m_dynamics->m_constraintArray[island.m_jointStart];
dgFloat32 dist = dgFloat32(0.0f);
dgFloat32 maxSpeed2 = dgFloat32(0.0f);
for (dgInt32 k = 1; k < island.m_bodyCount; k++) {
dgVector veloc;
dgVector omega;
dgBody *const body = bodyArray[k].m_body;
NEWTON_ASSERT(body->m_mass.m_w > dgFloat32(0.0f));
body->CalculateContinueVelocity(m_timestep, veloc, omega);
dgFloat32 mag2 = veloc % veloc;
if (mag2 > maxSpeed2) {
maxSpeed2 = mag2;
dist = body->m_collision->GetBoxMinRadius();
}
}
NEWTON_ASSERT(dist > dgFloat32(0.0f));
dgInt32 steps = dgInt32(
dgFloat32(4.0f) * dgSqrt(maxSpeed2) * m_timestep / dist) +
1;
dgFloat32 timestep = m_timestep / dgFloat32(steps);
if (steps > 8) {
steps = 8;
}
for (dgInt32 j = 0; j < steps - 1; j++) {
m_dynamics->BuildJacobianMatrixSimd(island, m_threadIndex, timestep);
m_system->CalculateReactionsForcesSimd(m_solverMode,
DG_SOLVER_MAX_ERROR);
m_dynamics->IntegrateArray(&m_system->m_bodyArray[1],
m_system->m_bodyCount - 1, DG_SOLVER_MAX_ERROR, timestep,
m_threadIndex, false);
for (dgInt32 k = 1; k < island.m_bodyCount; k++) {
dgBody *const body = bodyArray[k].m_body;
body->m_accel = body->m_prevExternalForce;
body->m_alpha = body->m_prevExternalTorque;
}
for (dgInt32 k = 0; k < island.m_jointCount; k++) {
dgConstraint *const constraint = constraintArray[k].m_joint;
if (constraint->GetId() == dgContactConstraintId) {
dgContact *const contact = (dgContact *)constraint;
const dgContactMaterial *const material =
contact->m_myCacheMaterial;
if (material->m_flags & dgContactMaterial::m_collisionEnable__) {
if (material->m_aabbOverlap) {
NEWTON_ASSERT(contact->m_body0);
NEWTON_ASSERT(contact->m_body1);
material->m_aabbOverlap(reinterpret_cast<const NewtonMaterial *>(material), reinterpret_cast<const NewtonBody *>(contact->m_body0),
reinterpret_cast<const NewtonBody *>(contact->m_body1), m_threadIndex);
}
dgCollidingPairCollector::dgPair pair;
pair.m_body0 = contact->m_body0;
pair.m_body1 = contact->m_body1;
pair.m_contact = contact;
pair.m_material = material;
pair.m_contactBuffer = contactBuffer;
m_world->CalculateContactsSimd(&pair, timestep, m_threadIndex);
//NEWTON_ASSERT (contact->m_maxDOF == 0);
NEWTON_ASSERT(pair.m_contact);
if (pair.m_contactCount) {
NEWTON_ASSERT(pair.m_contactCount <= (DG_CONSTRAINT_MAX_ROWS / 3));
m_world->ProcessContacts(&pair, timestep, m_threadIndex);
} else if (!pair.m_contactBuffer) {
m_world->ProcessCachedContacts(pair.m_contact,
pair.m_material, timestep, m_threadIndex);
}
}
}
}
}
m_dynamics->BuildJacobianMatrixSimd(island, m_threadIndex, timestep);
m_system->CalculateReactionsForcesSimd(m_solverMode,
DG_SOLVER_MAX_ERROR);
m_dynamics->IntegrateArray(&m_system->m_bodyArray[1],
m_system->m_bodyCount - 1, DG_SOLVER_MAX_ERROR, timestep,
m_threadIndex, true);
}
}
} else {
for (dgInt32 i = 0; i < count; i += step) {
const dgIsland &island = m_islandArray[i + m_threadIndex];
if (!island.m_isContinueCollision) {
m_dynamics->BuildJacobianMatrix(island, m_threadIndex, m_timestep);
m_system->CalculateReactionsForces(m_solverMode, DG_SOLVER_MAX_ERROR);
m_dynamics->IntegrateArray(&m_system->m_bodyArray[1],
m_system->m_bodyCount - 1, DG_SOLVER_MAX_ERROR, m_timestep,
m_threadIndex, true);
} else {
dgBodyInfo *const bodyArray =
&m_dynamics->m_bodyArray[island.m_bodyStart];
dgJointInfo *const constraintArray =
&m_dynamics->m_constraintArray[island.m_jointStart];
dgFloat32 dist = dgFloat32(0.0f);
dgFloat32 maxSpeed2 = dgFloat32(0.0f);
for (dgInt32 k = 1; k < island.m_bodyCount; k++) {
dgVector veloc;
dgVector omega;
dgBody *const body = bodyArray[k].m_body;
NEWTON_ASSERT(body->m_mass.m_w > dgFloat32(0.0f));
body->CalculateContinueVelocity(m_timestep, veloc, omega);
dgFloat32 mag2 = veloc % veloc;
if (mag2 > maxSpeed2) {
maxSpeed2 = mag2;
dist = body->m_collision->GetBoxMinRadius();
}
}
NEWTON_ASSERT(dist > dgFloat32(0.0f));
dgInt32 steps = dgInt32(
dgFloat32(4.0f) * dgSqrt(maxSpeed2) * m_timestep / dist) +
1;
dgFloat32 timestep = m_timestep / dgFloat32(steps);
if (steps > 8) {
steps = 8;
}
for (dgInt32 j = 0; j < steps - 1; j++) {
m_dynamics->BuildJacobianMatrix(island, m_threadIndex, timestep);
m_system->CalculateReactionsForces(m_solverMode, DG_SOLVER_MAX_ERROR);
m_dynamics->IntegrateArray(&m_system->m_bodyArray[1],
m_system->m_bodyCount - 1, DG_SOLVER_MAX_ERROR, timestep,
m_threadIndex, false);
for (dgInt32 k = 1; k < island.m_bodyCount; k++) {
dgBody *const body = bodyArray[k].m_body;
body->m_accel = body->m_prevExternalForce;
body->m_alpha = body->m_prevExternalTorque;
}
for (dgInt32 k = 0; k < island.m_jointCount; k++) {
dgConstraint *const constraint = constraintArray[k].m_joint;
if (constraint->GetId() == dgContactConstraintId) {
dgContact *const contact = (dgContact *)constraint;
const dgContactMaterial *const material =
contact->m_myCacheMaterial;
if (material->m_flags & dgContactMaterial::m_collisionEnable__) {
if (material->m_aabbOverlap) {
NEWTON_ASSERT(contact->m_body0);
NEWTON_ASSERT(contact->m_body1);
material->m_aabbOverlap(reinterpret_cast<const NewtonMaterial *>(material), reinterpret_cast<const NewtonBody *>(contact->m_body0),
reinterpret_cast<const NewtonBody *>(contact->m_body1), m_threadIndex);
}
dgCollidingPairCollector::dgPair pair;
pair.m_body0 = contact->m_body0;
pair.m_body1 = contact->m_body1;
pair.m_contact = contact;
pair.m_material = material;
pair.m_contactBuffer = contactBuffer;
m_world->CalculateContacts(&pair, timestep, m_threadIndex);
NEWTON_ASSERT(pair.m_contact);
if (pair.m_contactCount) {
NEWTON_ASSERT(pair.m_contactCount <= (DG_CONSTRAINT_MAX_ROWS / 3));
m_world->ProcessContacts(&pair, timestep, m_threadIndex);
} else if (!pair.m_contactBuffer) {
m_world->ProcessCachedContacts(pair.m_contact,
pair.m_material, timestep, m_threadIndex);
}
}
}
}
}
m_dynamics->BuildJacobianMatrix(island, m_threadIndex, timestep);
m_system->CalculateReactionsForces(m_solverMode, DG_SOLVER_MAX_ERROR);
m_dynamics->IntegrateArray(&m_system->m_bodyArray[1],
m_system->m_bodyCount - 1, DG_SOLVER_MAX_ERROR, timestep,
m_threadIndex, true);
}
}
}
}
void dgWorldDynamicUpdate::ReallocBodyMemory(dgInt32 bodyCount) {
void *memory;
dgInt32 stride;
dgInt32 newCount;
dgBodyInfo *bodyArray;
stride = sizeof(dgBodyInfo);
if (bodyCount) {
m_world->m_bodiesMemorySizeInBytes = m_world->m_bodiesMemorySizeInBytes * 2;
memory = m_world->GetAllocator()->MallocLow(
m_world->m_bodiesMemorySizeInBytes);
newCount = (m_world->m_bodiesMemorySizeInBytes / stride) & (-4);
bodyArray = (dgBodyInfo *)memory;
memcpy(bodyArray, m_bodyArray, bodyCount * sizeof(dgBodyInfo));
m_world->GetAllocator()->FreeLow(m_world->m_bodiesMemory);
} else {
memory = m_world->m_bodiesMemory;
newCount = (m_world->m_bodiesMemorySizeInBytes / stride) & (-4);
bodyArray = (dgBodyInfo *)memory;
}
m_maxBodiesCount = newCount;
m_world->m_bodiesMemory = memory;
m_bodyArray = bodyArray;
NEWTON_ASSERT((dgUnsigned64(m_bodyArray) & 0x0f) == 0);
}
void dgWorldDynamicUpdate::ReallocIslandMemory(dgInt32 islandCount) {
void *memory;
dgInt32 stride;
dgInt32 newCount;
dgIsland *islandArray;
stride = sizeof(dgIsland);
if (islandCount) {
m_world->m_islandMemorySizeInBytes = m_world->m_islandMemorySizeInBytes * 2;
memory = m_world->GetAllocator()->MallocLow(
m_world->m_islandMemorySizeInBytes);
newCount = (m_world->m_islandMemorySizeInBytes / stride) & (-4);
islandArray = (dgIsland *)memory;
memcpy(islandArray, m_islandArray, islandCount * sizeof(dgIsland));
m_world->GetAllocator()->FreeLow(m_world->m_islandMemory);
} else {
memory = m_world->m_islandMemory;
newCount = (m_world->m_islandMemorySizeInBytes / stride) & (-4);
islandArray = (dgIsland *)memory;
}
m_maxIslandCount = newCount;
m_world->m_islandMemory = memory;
m_islandArray = islandArray;
NEWTON_ASSERT((dgUnsigned64(m_islandArray) & 0x0f) == 0);
}
void dgWorldDynamicUpdate::ReallocJointsMemory(dgInt32 count) {
void *memory;
dgInt32 stride;
dgInt32 newCount;
dgJointInfo *constraintArray;
stride = sizeof(dgJointInfo);
if (count) {
m_world->m_jointsMemorySizeInBytes = m_world->m_jointsMemorySizeInBytes * 2;
memory = m_world->GetAllocator()->MallocLow(
m_world->m_jointsMemorySizeInBytes);
newCount = (m_world->m_jointsMemorySizeInBytes / stride) & (-4);
constraintArray = (dgJointInfo *)memory;
memcpy(constraintArray, m_constraintArray, count * sizeof(dgJointInfo));
m_world->GetAllocator()->FreeLow(m_world->m_jointsMemory);
} else {
memory = m_world->m_jointsMemory;
newCount = (m_world->m_jointsMemorySizeInBytes / stride) & (-4);
constraintArray = (dgJointInfo *)memory;
}
m_maxJointCount = newCount;
m_world->m_jointsMemory = memory;
m_constraintArray = constraintArray;
NEWTON_ASSERT((dgUnsigned64(m_constraintArray) & 0x0f) == 0);
}
void dgWorldDynamicUpdate::ReallocJacobiansMemory(dgInt32 count,
dgInt32 threadIndex) {
void *memory;
dgInt32 stride;
dgInt32 newCount;
dgJacobianPair *Jt;
dgJacobianPair *JMinv;
dgFloat32 *force;
dgFloat32 *accel;
dgFloat32 *deltaAccel;
dgFloat32 *deltaForce;
dgFloat32 *diagDamp;
dgFloat32 *invDJMinvJt;
dgFloat32 *restitution;
dgFloat32 *penetration;
dgFloat32 *coordenateAccel;
dgFloat32 *penetrationStiffness;
dgFloat32 *lowerBoundFrictionCoefficent;
dgFloat32 *upperBoundFrictionCoefficent;
dgFloat32 **forceFeedback;
dgInt32 *normalForceIndex;
dgInt32 *accelIsMotor;
stride = sizeof(dgJacobianPair) + // Jt
sizeof(dgJacobianPair) + // JtMinv
sizeof(dgFloat32) + // lowerBoundFrictionCoefficent
sizeof(dgFloat32) + // upperBoundFrictionCoefficent
sizeof(dgFloat32 *) + // forceFeedback
sizeof(dgFloat32) + // force
sizeof(dgFloat32) + // accel
sizeof(dgFloat32) + // deltaAccel
sizeof(dgFloat32) + // deltaForce
sizeof(dgFloat32) + // diagDamp
sizeof(dgFloat32) + // invDJMinvJt
sizeof(dgFloat32) + // penetration
sizeof(dgFloat32) + // restitution
sizeof(dgFloat32) + // coordinateAccel
sizeof(dgFloat32) + // penetrationStiffness
sizeof(dgInt32) + // normalForceIndex
sizeof(dgInt32) + // accelIsMotor;
0;
if (count) {
m_world->m_jacobiansMemorySizeInBytes[threadIndex] =
m_world->m_jacobiansMemorySizeInBytes[threadIndex] * 2;
memory = m_world->GetAllocator()->MallocLow(
m_world->m_jacobiansMemorySizeInBytes[threadIndex] + 64);
newCount = ((m_world->m_jacobiansMemorySizeInBytes[threadIndex] - 16) / stride) & (-8);
Jt = (dgJacobianPair *)memory;
JMinv = (dgJacobianPair *)&Jt[newCount];
force = (dgFloat32 *)&JMinv[newCount];
force += 4;
accel = (dgFloat32 *)&force[newCount];
;
deltaAccel = (dgFloat32 *)&accel[newCount];
deltaForce = (dgFloat32 *)&deltaAccel[newCount];
// forceStep = (dgFloat32*) &deltaForce [newCount];
diagDamp = (dgFloat32 *)&deltaForce[newCount];
coordenateAccel = (dgFloat32 *)&diagDamp[newCount];
restitution = (dgFloat32 *)&coordenateAccel[newCount];
penetration = (dgFloat32 *)&restitution[newCount];
penetrationStiffness = (dgFloat32 *)&penetration[newCount];
invDJMinvJt = (dgFloat32 *)&penetrationStiffness[newCount];
lowerBoundFrictionCoefficent = (dgFloat32 *)&invDJMinvJt[newCount];
upperBoundFrictionCoefficent =
(dgFloat32 *)&lowerBoundFrictionCoefficent[newCount];
forceFeedback = (dgFloat32 **)&upperBoundFrictionCoefficent[newCount];
normalForceIndex = (dgInt32 *)&forceFeedback[newCount];
accelIsMotor = (dgInt32 *)&normalForceIndex[newCount];
m_world->GetAllocator()->FreeLow(m_world->m_jacobiansMemory[threadIndex]);
} else {
memory = m_world->m_jacobiansMemory[threadIndex];
newCount = ((m_world->m_jacobiansMemorySizeInBytes[threadIndex] - 16) / stride) & (-8);
Jt = (dgJacobianPair *)memory;
JMinv = (dgJacobianPair *)&Jt[newCount];
force = (dgFloat32 *)&JMinv[newCount];
force += 4;
accel = (dgFloat32 *)&force[newCount];
;
deltaAccel = (dgFloat32 *)&accel[newCount];
deltaForce = (dgFloat32 *)&deltaAccel[newCount];
diagDamp = (dgFloat32 *)&deltaForce[newCount];
coordenateAccel = (dgFloat32 *)&diagDamp[newCount];
restitution = (dgFloat32 *)&coordenateAccel[newCount];
penetration = (dgFloat32 *)&restitution[newCount];
penetrationStiffness = (dgFloat32 *)&penetration[newCount];
invDJMinvJt = (dgFloat32 *)&penetrationStiffness[newCount];
lowerBoundFrictionCoefficent = (dgFloat32 *)&invDJMinvJt[newCount];
upperBoundFrictionCoefficent =
(dgFloat32 *)&lowerBoundFrictionCoefficent[newCount];
forceFeedback = (dgFloat32 **)&upperBoundFrictionCoefficent[newCount];
normalForceIndex = (dgInt32 *)&forceFeedback[newCount];
accelIsMotor = (dgInt32 *)&normalForceIndex[newCount];
}
m_solverMemory[threadIndex].m_maxJacobiansCount = newCount;
m_world->m_jacobiansMemory[threadIndex] = memory;
force[-1] = dgFloat32(1.0f);
force[-2] = dgFloat32(1.0f);
force[-3] = dgFloat32(1.0f);
force[-4] = dgFloat32(1.0f);
m_solverMemory[threadIndex].m_Jt = Jt;
m_solverMemory[threadIndex].m_JMinv = JMinv;
m_solverMemory[threadIndex].m_force = force;
m_solverMemory[threadIndex].m_accel = accel;
m_solverMemory[threadIndex].m_deltaForce = deltaForce;
m_solverMemory[threadIndex].m_deltaAccel = deltaAccel;
m_solverMemory[threadIndex].m_diagDamp = diagDamp;
m_solverMemory[threadIndex].m_invDJMinvJt = invDJMinvJt;
m_solverMemory[threadIndex].m_penetration = penetration;
m_solverMemory[threadIndex].m_restitution = restitution;
m_solverMemory[threadIndex].m_coordenateAccel = coordenateAccel;
m_solverMemory[threadIndex].m_penetrationStiffness = penetrationStiffness;
m_solverMemory[threadIndex].m_lowerBoundFrictionCoefficent =
lowerBoundFrictionCoefficent;
m_solverMemory[threadIndex].m_upperBoundFrictionCoefficent =
upperBoundFrictionCoefficent;
m_solverMemory[threadIndex].m_jointFeebackForce = forceFeedback;
m_solverMemory[threadIndex].m_normalForceIndex = normalForceIndex;
m_solverMemory[threadIndex].m_accelIsMotor = accelIsMotor;
NEWTON_ASSERT((dgUnsigned64(Jt) & 0x0f) == 0);
NEWTON_ASSERT((dgUnsigned64(JMinv) & 0x0f) == 0);
NEWTON_ASSERT((dgUnsigned64(force) & 0x0f) == 0);
NEWTON_ASSERT((dgUnsigned64(accel) & 0x0f) == 0);
NEWTON_ASSERT((dgUnsigned64(deltaAccel) & 0x0f) == 0);
NEWTON_ASSERT((dgUnsigned64(deltaForce) & 0x0f) == 0);
NEWTON_ASSERT((dgUnsigned64(diagDamp) & 0x0f) == 0);
NEWTON_ASSERT((dgUnsigned64(penetration) & 0x0f) == 0);
NEWTON_ASSERT((dgUnsigned64(restitution) & 0x0f) == 0);
NEWTON_ASSERT((dgUnsigned64(invDJMinvJt) & 0x0f) == 0);
NEWTON_ASSERT((dgUnsigned64(coordenateAccel) & 0x0f) == 0);
NEWTON_ASSERT((dgUnsigned64(penetrationStiffness) & 0x0f) == 0);
NEWTON_ASSERT((dgUnsigned64(lowerBoundFrictionCoefficent) & 0x0f) == 0);
NEWTON_ASSERT((dgUnsigned64(upperBoundFrictionCoefficent) & 0x0f) == 0);
NEWTON_ASSERT((dgUnsigned64(forceFeedback) & 0x0f) == 0);
NEWTON_ASSERT((dgUnsigned64(normalForceIndex) & 0x0f) == 0);
NEWTON_ASSERT((dgUnsigned64(accelIsMotor) & 0x0f) == 0);
}
void dgWorldDynamicUpdate::ReallocIntenalForcesMemory(dgInt32 count,
dgInt32 threadIndex) {
dgInt32 stride;
dgInt32 newCount;
void *memory;
dgInt32 *treadLocks;
dgJacobian *internalForces;
dgJacobian *internalVeloc;
stride = sizeof(dgJacobian) + sizeof(dgJacobian) + sizeof(dgInt32) + 0;
if (count) {
m_world->m_internalForcesMemorySizeInBytes[threadIndex] =
m_world->m_internalForcesMemorySizeInBytes[threadIndex] * 2;
memory = m_world->GetAllocator()->MallocLow(
m_world->m_internalForcesMemorySizeInBytes[threadIndex]);
// newCount = m_world->m_internalForcesMemorySizeInBytes[threadIndex] / stride;
newCount = ((m_world->m_internalForcesMemorySizeInBytes[threadIndex] - 16) / stride) & (-8);
internalForces = (dgJacobian *)memory;
internalVeloc = &internalForces[newCount];
treadLocks = (dgInt32 *)&internalVeloc[newCount];
m_world->GetAllocator()->FreeLow(
m_world->m_internalForcesMemory[threadIndex]);
} else {
memory = m_world->m_internalForcesMemory[threadIndex];
newCount = ((m_world->m_internalForcesMemorySizeInBytes[threadIndex] - 16) / stride) & (-8);
internalForces = (dgJacobian *)memory;
internalVeloc = &internalForces[newCount];
treadLocks = (dgInt32 *)&internalVeloc[newCount];
}
NEWTON_ASSERT(
(((dgInt8 *)&treadLocks[newCount - 1]) - ((dgInt8 *)memory)) < m_world->m_internalForcesMemorySizeInBytes[threadIndex]);
m_solverMemory[threadIndex].m_maxBodiesCount = newCount;
m_world->m_internalForcesMemory[threadIndex] = memory;
m_solverMemory[threadIndex].m_internalForces = internalForces;
m_solverMemory[threadIndex].m_internalVeloc = internalVeloc;
m_solverMemory[threadIndex].m_treadLocks = treadLocks;
memset(treadLocks, 0, newCount * sizeof(dgInt32));
// m_solverMemory[threadIndex].m_bodyFreeze = bodyFreeze;
NEWTON_ASSERT((dgUnsigned64(internalForces) & 0x0f) == 0);
NEWTON_ASSERT((dgUnsigned64(internalVeloc) & 0x0f) == 0);
}
void dgWorldDynamicUpdate::BuildIsland(dgQueue<dgBody *> &queue,
dgInt32 jointCount, dgInt32 hasUnilateralJoints,
dgInt32 isContinueCollisionIsland) {
dgInt32 bodyCount = 1;
dgInt32 lruMark = m_markLru;
if (m_bodies >= m_maxBodiesCount) {
ReallocBodyMemory(m_bodies);
}
m_bodyArray[m_bodies].m_body = m_world->m_sentionelBody;
NEWTON_ASSERT(m_world->m_sentionelBody->m_index == 0);
NEWTON_ASSERT(dgInt32(m_world->m_sentionelBody->m_dynamicsLru) == m_markLru);
while (!queue.IsEmpty()) {
dgInt32 count = queue.m_firstIndex - queue.m_lastIndex;
if (count < 0) {
NEWTON_ASSERT(0);
count += queue.m_mod;
}
dgInt32 index = queue.m_lastIndex;
queue.Reset();
for (dgInt32 j = 0; j < count; j++) {
dgBody *const body = queue.m_pool[index];
NEWTON_ASSERT(body);
NEWTON_ASSERT(dgInt32(body->m_dynamicsLru) == lruMark);
NEWTON_ASSERT(body->m_masterNode);
if (body->m_invMass.m_w > dgFloat32(0.0f)) {
dgInt32 bodyIndex = m_bodies + bodyCount;
if (bodyIndex >= m_maxBodiesCount) {
ReallocBodyMemory(bodyIndex);
}
body->m_index = bodyCount;
m_bodyArray[bodyIndex].m_body = body;
bodyCount++;
}
for (dgBodyMasterListRow::dgListNode *jointNode =
body->m_masterNode->GetInfo().GetFirst();
jointNode; jointNode =
jointNode->GetNext()) {
dgBodyMasterListCell &cell = jointNode->GetInfo();
dgBody *const bodyN = cell.m_bodyNode;
dgConstraint *const constraint = cell.m_joint;
if (dgInt32(constraint->m_dynamicsLru) != lruMark) {
constraint->m_dynamicsLru = dgUnsigned32(lruMark);
dgInt32 jointIndex = m_joints + jointCount;
if (jointIndex >= m_maxJointCount) {
ReallocJointsMemory(jointIndex);
}
if (constraint->m_isUnilateral) {
hasUnilateralJoints = 1;
NEWTON_ASSERT(
(constraint->m_body0 == m_world->m_sentionelBody) || (constraint->m_body1 == m_world->m_sentionelBody));
}
constraint->m_index = dgUnsigned32(jointCount);
m_constraintArray[jointIndex].m_joint = constraint;
jointCount++;
NEWTON_ASSERT(constraint->m_body0);
NEWTON_ASSERT(constraint->m_body1);
}
if (dgInt32(bodyN->m_dynamicsLru) != lruMark) {
if (bodyN->m_invMass.m_w > dgFloat32(0.0f)) {
queue.Insert(bodyN);
bodyN->m_dynamicsLru = dgUnsigned32(lruMark);
}
}
}
index++;
if (index >= queue.m_mod) {
NEWTON_ASSERT(0);
index = 0;
}
}
}
if (bodyCount > 1) {
if (m_islands >= m_maxIslandCount) {
ReallocIslandMemory(m_islands);
}
m_islandArray[m_islands].m_bodyStart = m_bodies;
m_islandArray[m_islands].m_jointStart = m_joints;
m_islandArray[m_islands].m_bodyCount = bodyCount;
m_islandArray[m_islands].m_jointCount = jointCount;
m_islandArray[m_islands].m_hasUnilateralJoints = hasUnilateralJoints;
m_islandArray[m_islands].m_isContinueCollision = isContinueCollisionIsland;
m_islands++;
m_bodies += bodyCount;
m_joints += jointCount;
}
}
void dgWorldDynamicUpdate::SpanningTree(dgBody *const body) {
dgInt32 bodyCount = 0;
dgInt32 jointCount = 0;
dgInt32 staticCount = 0;
dgInt32 lruMark = m_markLru - 1;
dgInt32 isInWorld = 0;
dgInt32 isInEquilibrium = 1;
dgInt32 hasUnilateralJoints = 0;
dgInt32 isContinueCollisionIsland = 0;
dgBody *heaviestBody = NULL;
dgFloat32 haviestMass = dgFloat32(0.0f);
dgQueue<dgBody *> queue(
(dgBody **)m_world->m_pairMemoryBuffer,
(dgInt32(m_world->m_pairMemoryBufferSizeInBytes >> 1)) / dgInt32(sizeof(void *)));
dgBody **const staticPool = &queue.m_pool[queue.m_mod];
body->m_dynamicsLru = dgUnsigned32(lruMark);
queue.Insert(body);
while (!queue.IsEmpty()) {
dgInt32 count = queue.m_firstIndex - queue.m_lastIndex;
if (count < 0) {
NEWTON_ASSERT(0);
count += queue.m_mod;
}
dgInt32 index = queue.m_lastIndex;
queue.Reset();
for (dgInt32 j = 0; j < count; j++) {
dgBody *const srcBody = queue.m_pool[index];
NEWTON_ASSERT(srcBody);
NEWTON_ASSERT(srcBody->m_invMass.m_w > dgFloat32(0.0f));
NEWTON_ASSERT(dgInt32(srcBody->m_dynamicsLru) == lruMark);
NEWTON_ASSERT(srcBody->m_masterNode);
dgInt32 bodyIndex = m_bodies + bodyCount;
if (bodyIndex >= m_maxBodiesCount) {
ReallocBodyMemory(bodyIndex);
}
m_bodyArray[bodyIndex].m_body = srcBody;
isInWorld |= srcBody->m_isInWorld;
isInEquilibrium &= srcBody->m_equilibrium;
isInEquilibrium &= srcBody->m_autoSleep;
// isInEquilibrium = 0;
srcBody->m_sleeping = false;
isContinueCollisionIsland |= srcBody->m_solverInContinueCollision;
if (srcBody->m_mass.m_w > haviestMass) {
haviestMass = srcBody->m_mass.m_w;
heaviestBody = srcBody;
}
bodyCount++;
for (dgBodyMasterListRow::dgListNode *jointNode =
srcBody->m_masterNode->GetInfo().GetFirst();
jointNode; jointNode =
jointNode->GetNext()) {
dgBodyMasterListCell &cell = jointNode->GetInfo();
dgBody *const bodyN = cell.m_bodyNode;
if (dgInt32(bodyN->m_dynamicsLru) < lruMark) {
NEWTON_ASSERT(bodyN == cell.m_bodyNode);
bodyN->m_dynamicsLru = dgUnsigned32(lruMark);
if (bodyN->m_invMass.m_w > dgFloat32(0.0f)) {
queue.Insert(bodyN);
} else {
dgInt32 duplicateBody = 0;
for (; duplicateBody < staticCount; duplicateBody++) {
if (staticPool[duplicateBody] == srcBody) {
break;
}
}
if (duplicateBody == staticCount) {
staticPool[staticCount] = srcBody;
staticCount++;
NEWTON_ASSERT(srcBody->m_invMass.m_w > dgFloat32(0.0f));
}
dgConstraint *const constraint = cell.m_joint;
NEWTON_ASSERT(dgInt32(constraint->m_dynamicsLru) != m_markLru);
dgInt32 jointIndex = m_joints + jointCount;
if (jointIndex >= m_maxJointCount) {
ReallocJointsMemory(jointIndex);
}
if (constraint->m_isUnilateral) {
hasUnilateralJoints = 1;
NEWTON_ASSERT(
(constraint->m_body0 == m_world->m_sentionelBody) || (constraint->m_body1 == m_world->m_sentionelBody));
}
constraint->m_index = dgUnsigned32(jointCount);
m_constraintArray[jointIndex].m_joint = constraint;
jointCount++;
NEWTON_ASSERT(constraint->m_body0);
NEWTON_ASSERT(constraint->m_body1);
}
} else if (cell.m_bodyNode->m_invMass.m_w == dgFloat32(0.0f)) {
dgInt32 duplicateBody = 0;
for (; duplicateBody < staticCount; duplicateBody++) {
if (staticPool[duplicateBody] == srcBody) {
break;
}
}
if (duplicateBody == staticCount) {
staticPool[staticCount] = srcBody;
staticCount++;
NEWTON_ASSERT(srcBody->m_invMass.m_w > dgFloat32(0.0f));
}
dgConstraint *const constraint = cell.m_joint;
NEWTON_ASSERT(dgInt32(constraint->m_dynamicsLru) != m_markLru);
dgInt32 jointIndex = m_joints + jointCount;
if (jointIndex >= m_maxJointCount) {
ReallocJointsMemory(jointIndex);
}
if (constraint->m_isUnilateral) {
hasUnilateralJoints = 1;
NEWTON_ASSERT(
(constraint->m_body0 == m_world->m_sentionelBody) || (constraint->m_body1 == m_world->m_sentionelBody));
}
constraint->m_index = dgUnsigned32(jointCount);
m_constraintArray[jointIndex].m_joint = constraint;
jointCount++;
NEWTON_ASSERT(constraint->m_body0);
NEWTON_ASSERT(constraint->m_body1);
}
}
index++;
if (index >= queue.m_mod) {
NEWTON_ASSERT(0);
index = 0;
}
}
}
if (isInEquilibrium | !isInWorld) {
for (dgInt32 i = 0; i < bodyCount; i++) {
dgBody *const bodyI = m_bodyArray[m_bodies + i].m_body;
bodyI->m_dynamicsLru = dgUnsigned32(m_markLru);
bodyI->m_sleeping = true;
}
} else {
if (m_world->m_islandUpdate) {
dgIslandCallbackStruct record;
record.m_world = m_world;
record.m_count = bodyCount;
record.m_strideInByte = sizeof(dgBodyInfo);
record.m_bodyArray = &m_bodyArray[m_bodies].m_body;
if (!m_world->m_islandUpdate(m_world, &record, bodyCount)) {
for (dgInt32 i = 0; i < bodyCount; i++) {
dgBody *const bodyI = m_bodyArray[m_bodies + i].m_body;
bodyI->m_dynamicsLru = dgUnsigned32(m_markLru);
}
return;
}
}
if (staticCount) {
queue.Reset();
for (dgInt32 i = 0; i < staticCount; i++) {
dgBody *const bodyI = staticPool[i];
bodyI->m_dynamicsLru = dgUnsigned32(m_markLru);
queue.Insert(bodyI);
NEWTON_ASSERT(dgInt32(bodyI->m_dynamicsLru) == m_markLru);
}
for (dgInt32 i = 0; i < jointCount; i++) {
dgConstraint *const constraint = m_constraintArray[m_joints + i].m_joint;
constraint->m_dynamicsLru = dgUnsigned32(m_markLru);
}
} else {
NEWTON_ASSERT(heaviestBody);
queue.Insert(heaviestBody);
// body->m_dynamicsLru = m_markLru;
heaviestBody->m_dynamicsLru = dgUnsigned32(m_markLru);
}
BuildIsland(queue, jointCount, hasUnilateralJoints,
isContinueCollisionIsland);
}
}
void dgWorldDynamicUpdate::IntegrateArray(const dgBodyInfo *bodyArray,
dgInt32 count, dgFloat32 accelTolerance, dgFloat32 timestep,
dgInt32 threadIndex, bool update) const {
bool isAutoSleep = true;
bool stackSleeping = true;
dgInt32 sleepCounter = 10000;
dgFloat32 speedFreeze = m_world->m_freezeSpeed2;
dgFloat32 accelFreeze = m_world->m_freezeAccel2;
dgFloat32 maxAccel = dgFloat32(0.0f);
dgFloat32 maxAlpha = dgFloat32(0.0f);
dgFloat32 maxSpeed = dgFloat32(0.0f);
dgFloat32 maxOmega = dgFloat32(0.0f);
dgVector zero(dgFloat32(0.0f), dgFloat32(0.0f), dgFloat32(0.0f),
dgFloat32(0.0f));
dgFloat32 forceDamp = DG_FREEZZING_VELOCITY_DRAG;
if (count <= 2) {
bool autosleep = bodyArray[0].m_body->m_autoSleep;
if (count == 2) {
autosleep &= bodyArray[1].m_body->m_autoSleep;
}
if (!autosleep) {
forceDamp = dgFloat32(0.9999f);
}
}
for (dgInt32 i = 0; i < count; i++) {
dgBody *const body = bodyArray[i].m_body;
if (body->m_invMass.m_w && body->m_isInWorld) {
body->IntegrateVelocity(timestep);
dgFloat32 accel2 = body->m_accel % body->m_accel;
dgFloat32 alpha2 = body->m_alpha % body->m_alpha;
dgFloat32 speed2 = body->m_veloc % body->m_veloc;
dgFloat32 omega2 = body->m_omega % body->m_omega;
maxAccel = GetMax(maxAccel, accel2);
maxAlpha = GetMax(maxAlpha, alpha2);
maxSpeed = GetMax(maxSpeed, speed2);
maxOmega = GetMax(maxOmega, omega2);
bool equilibrium = (accel2 < accelFreeze) && (alpha2 < accelFreeze) && (speed2 < speedFreeze) && (omega2 < speedFreeze);
if (equilibrium) {
body->m_veloc = body->m_veloc.Scale(forceDamp);
body->m_omega = body->m_omega.Scale(forceDamp);
}
body->m_equilibrium = dgUnsigned32(equilibrium);
stackSleeping &= equilibrium;
isAutoSleep &= body->m_autoSleep;
sleepCounter = GetMin(sleepCounter, body->m_sleepingCounter);
}
}
if (update) {
for (dgInt32 i = 0; i < count; i++) {
dgBody *const body = bodyArray[i].m_body;
if (body->m_invMass.m_w && body->m_isInWorld) {
body->UpdateMatrix(timestep, threadIndex);
}
}
if (isAutoSleep) {
if (stackSleeping) {
for (dgInt32 i = 0; i < count; i++) {
dgBody *const body = bodyArray[i].m_body;
body->m_netForce = zero;
body->m_netTorque = zero;
body->m_veloc = zero;
body->m_omega = zero;
}
} else {
if ((maxAccel > m_world->m_sleepTable[DG_SLEEP_ENTRIES - 1].m_maxAccel) || (maxAlpha > m_world->m_sleepTable[DG_SLEEP_ENTRIES - 1].m_maxAlpha) || (maxSpeed > m_world->m_sleepTable[DG_SLEEP_ENTRIES - 1].m_maxVeloc) || (maxOmega > m_world->m_sleepTable[DG_SLEEP_ENTRIES - 1].m_maxOmega)) {
for (dgInt32 i = 0; i < count; i++) {
dgBody *const body = bodyArray[i].m_body;
body->m_sleepingCounter = 0;
}
} else {
dgInt32 index = 0;
for (dgInt32 i = 0; i < DG_SLEEP_ENTRIES; i++) {
if ((maxAccel <= m_world->m_sleepTable[i].m_maxAccel) && (maxAlpha <= m_world->m_sleepTable[i].m_maxAlpha) && (maxSpeed <= m_world->m_sleepTable[i].m_maxVeloc) && (maxOmega <= m_world->m_sleepTable[i].m_maxOmega)) {
index = i;
break;
}
}
dgInt32 timeScaleSleepCount = dgInt32(
dgFloat32(60.0f) * sleepCounter * timestep);
if (timeScaleSleepCount > m_world->m_sleepTable[index].m_steps) {
for (dgInt32 i = 0; i < count; i++) {
dgBody *const body = bodyArray[i].m_body;
body->m_netForce = zero;
body->m_netTorque = zero;
body->m_veloc = zero;
body->m_omega = zero;
body->m_equilibrium = true;
}
} else {
sleepCounter++;
for (dgInt32 i = 0; i < count; i++) {
dgBody *const body = bodyArray[i].m_body;
body->m_sleepingCounter = sleepCounter;
}
}
}
}
}
}
}
dgInt32 dgWorldDynamicUpdate::GetJacobialDerivatives(const dgIsland &island,
dgInt32 threadIndex, bool bitMode, dgInt32 rowCount, dgFloat32 timestep) {
dgContraintDescritor constraintParams;
dgInt32 jointCount = island.m_jointCount;
constraintParams.m_world = m_world;
constraintParams.m_threadIndex = threadIndex;
constraintParams.m_timestep = timestep;
constraintParams.m_invTimestep = dgFloat32(1.0f / timestep);
dgJacobianMemory &solverMemory = m_solverMemory[threadIndex];
dgJointInfo *const constraintArray = &m_constraintArray[island.m_jointStart];
for (dgInt32 j = 0; j < jointCount; j++) {
dgConstraint *const constraint = constraintArray[j].m_joint;
;
if (constraint->m_isUnilateral ^ bitMode) {
dgInt32 dof = dgInt32(constraint->m_maxDOF);
NEWTON_ASSERT(dof <= DG_CONSTRAINT_MAX_ROWS);
for (dgInt32 i = 0; i < dof; i++) {
constraintParams.m_forceBounds[i].m_low = DG_MIN_BOUND;
constraintParams.m_forceBounds[i].m_upper = DG_MAX_BOUND;
constraintParams.m_forceBounds[i].m_jointForce = NULL;
constraintParams.m_forceBounds[i].m_normalIndex =
DG_BILATERAL_CONSTRAINT;
}
NEWTON_ASSERT(constraint->m_body0);
NEWTON_ASSERT(constraint->m_body1);
constraint->m_body0->m_inCallback = true;
constraint->m_body1->m_inCallback = true;
dof = dgInt32(constraint->JacobianDerivative(constraintParams));
constraint->m_body0->m_inCallback = false;
constraint->m_body1->m_inCallback = false;
dgInt32 m0 =
(constraint->m_body0->m_invMass.m_w != dgFloat32(0.0f)) ? constraint->m_body0->m_index : 0;
dgInt32 m1 =
(constraint->m_body1->m_invMass.m_w != dgFloat32(0.0f)) ? constraint->m_body1->m_index : 0;
NEWTON_ASSERT(constraint->m_index == dgUnsigned32(j));
constraintArray[j].m_autoPairstart = rowCount;
constraintArray[j].m_autoPaircount = dof;
constraintArray[j].m_autoPairActiveCount = dof;
constraintArray[j].m_m0 = m0;
constraintArray[j].m_m1 = m1;
for (dgInt32 i = 0; i < dof; i++) {
NEWTON_ASSERT(constraintParams.m_forceBounds[i].m_jointForce);
solverMemory.m_Jt[rowCount] = constraintParams.m_jacobian[i];
NEWTON_ASSERT(constraintParams.m_jointStiffness[i] >= dgFloat32(0.1f));
NEWTON_ASSERT(constraintParams.m_jointStiffness[i] <= dgFloat32(100.0f));
solverMemory.m_diagDamp[rowCount] =
constraintParams.m_jointStiffness[i];
solverMemory.m_coordenateAccel[rowCount] =
constraintParams.m_jointAccel[i];
solverMemory.m_accelIsMotor[rowCount] = dgInt32(
constraintParams.m_isMotor[i]);
solverMemory.m_restitution[rowCount] =
constraintParams.m_restitution[i];
solverMemory.m_penetration[rowCount] =
constraintParams.m_penetration[i];
solverMemory.m_penetrationStiffness[rowCount] =
constraintParams.m_penetrationStiffness[i];
solverMemory.m_lowerBoundFrictionCoefficent[rowCount] =
constraintParams.m_forceBounds[i].m_low;
solverMemory.m_upperBoundFrictionCoefficent[rowCount] =
constraintParams.m_forceBounds[i].m_upper;
solverMemory.m_jointFeebackForce[rowCount] =
constraintParams.m_forceBounds[i].m_jointForce;
solverMemory.m_normalForceIndex[rowCount] =
constraintParams.m_forceBounds[i].m_normalIndex + ((constraintParams.m_forceBounds[i].m_normalIndex >= 0) ? (rowCount - i) : 0);
rowCount++;
}
#ifdef _DEBUG
for (dgInt32 i = 0; i < ((rowCount + 3) & 0xfffc) - rowCount; i++) {
solverMemory.m_diagDamp[rowCount + i] = dgFloat32(0.0f);
solverMemory.m_coordenateAccel[rowCount + i] = dgFloat32(0.0f);
solverMemory.m_restitution[rowCount + i] = dgFloat32(0.0f);
solverMemory.m_penetration[rowCount + i] = dgFloat32(0.0f);
solverMemory.m_penetrationStiffness[rowCount + i] = dgFloat32(0.0f);
solverMemory.m_lowerBoundFrictionCoefficent[rowCount + i] = dgFloat32(
0.0f);
solverMemory.m_upperBoundFrictionCoefficent[rowCount + i] = dgFloat32(
0.0f);
solverMemory.m_jointFeebackForce[rowCount + i] = 0;
solverMemory.m_normalForceIndex[rowCount + i] = 0;
}
#endif
rowCount =
(rowCount & (DG_SIMD_WORD_SIZE - 1)) ? ((rowCount & (-DG_SIMD_WORD_SIZE)) + DG_SIMD_WORD_SIZE) : rowCount;
NEWTON_ASSERT((rowCount & (DG_SIMD_WORD_SIZE - 1)) == 0);
}
}
return rowCount;
}
void dgWorldDynamicUpdate::BuildJacobianMatrixSimd(const dgIsland &island,
dgInt32 threadIndex, dgFloat32 timestep) {
#ifdef DG_BUILD_SIMD_CODE
// dgInt32 threads;
// dgInt32 rowCount;
// dgInt32 bodyCount;
// dgInt32 jointCount;
// dgContraintDescritor constraintParams;
// dgVector zeroVector (dgFloat32(0.0f), dgFloat32(0.0f), dgFloat32(0.0f), dgFloat32(0.0f));
NEWTON_ASSERT(island.m_bodyCount >= 2);
//NEWTON_ASSERT (island.m_jointCount >= 1);
// dgInt32 threads = m_world->m_numberOfTheads;
dgJacobianMemory &solverMemory = m_solverMemory[threadIndex];
dgInt32 bodyCount = island.m_bodyCount;
dgBodyInfo *const bodyArray = &m_bodyArray[island.m_bodyStart];
bodyArray[0].m_body = m_world->GetSentinelBody();
// body = bodyArray[0].m_body;
// body->m_hasRelativeVelocity = false;
// body->m_accel = dgVector (dgFloat32 (0.0f), dgFloat32 (0.0f), dgFloat32 (0.0f), dgFloat32 (0.0f));
// body->m_alpha = dgVector (dgFloat32 (0.0f), dgFloat32 (0.0f), dgFloat32 (0.0f), dgFloat32 (0.0f));
NEWTON_ASSERT(
(bodyArray[0].m_body->m_accel % bodyArray[0].m_body->m_accel) == dgFloat32(0.0f));
NEWTON_ASSERT(
(bodyArray[0].m_body->m_alpha % bodyArray[0].m_body->m_alpha) == dgFloat32(0.0f));
for (dgInt32 i = 1; i < bodyCount; i++) {
dgBody *const body = bodyArray[i].m_body;
NEWTON_ASSERT(body->m_invMass.m_w > dgFloat32(0.0f));
// body->m_hasRelativeVelocity = false;
// body->AddGyroscopicTorque();
body->AddDamingAcceleration();
body->CalcInvInertiaMatrixSimd();
}
while (bodyCount >= solverMemory.m_maxBodiesCount) {
m_world->dgGetUserLock();
ReallocIntenalForcesMemory(bodyCount, threadIndex);
m_world->dgReleasedUserLock();
}
dgInt32 jointCount = island.m_jointCount;
// constraintArray = &m_constraintArray[island.m_jointStart];
dgJointInfo *const constraintArray = &m_constraintArray[island.m_jointStart];
solverMemory.m_constraintArray = constraintArray;
dgInt32 rowCount = 0;
dgFloat32 maxRowCount = 0;
for (dgInt32 j = 0; j < jointCount; j++) {
dgConstraint *const constraint = constraintArray[j].m_joint;
dgInt32 dof =
dgInt32(
(constraint->m_maxDOF & (DG_SIMD_WORD_SIZE - 1)) ? ((constraint->m_maxDOF & (-DG_SIMD_WORD_SIZE)) + DG_SIMD_WORD_SIZE) : constraint->m_maxDOF);
maxRowCount += dof;
}
while (maxRowCount > solverMemory.m_maxJacobiansCount) {
m_world->dgGetUserLock();
ReallocJacobiansMemory(solverMemory.m_maxJacobiansCount * 2, threadIndex);
m_world->dgReleasedUserLock();
}
if (island.m_hasUnilateralJoints) {
rowCount = GetJacobialDerivatives(island, threadIndex, false, rowCount,
timestep);
}
rowCount = GetJacobialDerivatives(island, threadIndex, true, rowCount,
timestep);
solverMemory.m_rowCount = rowCount;
solverMemory.m_bodyCount = bodyCount;
solverMemory.m_bodyArray = bodyArray;
solverMemory.m_jointCount = jointCount;
solverMemory.m_timeStep = timestep;
solverMemory.m_invTimeStep = dgFloat32(1.0f) / solverMemory.m_timeStep;
dgFloat32 *const force = solverMemory.m_force;
const dgJacobianPair *const Jt = solverMemory.m_Jt;
dgJacobianPair *const JMinv = solverMemory.m_JMinv;
dgFloat32 *const diagDamp = solverMemory.m_diagDamp;
dgFloat32 *const extAccel = solverMemory.m_deltaAccel;
dgFloat32 *const invDJMinvJt = solverMemory.m_invDJMinvJt;
dgFloat32 *const coordenateAccel = solverMemory.m_coordenateAccel;
dgFloat32 **const jointForceFeeback = solverMemory.m_jointFeebackForce;
// dgJacobianIndex* const jacobianIndexArray = solverMemory.m_jacobianIndexArray;
// dgBilateralBounds* const bilateralForceBounds = solverMemory.m_bilateralForceBounds;
simd_type zero;
zero = simd_set1(dgFloat32(0.0f));
for (dgInt32 k = 0; k < jointCount; k++) {
// dgInt32 m0;
// dgInt32 m1;
// dgInt32 index;
// dgInt32 count;
// dgFloat32 diag;
// dgFloat32 stiffness;
// simd_type tmp0;
// simd_type tmp1;
// simd_type invMass0;
// simd_type invMass1;
// simd_type tmpDiag;
// simd_type tmpAccel;
// dgBody* body0;
// dgBody* body1;
dgInt32 index = constraintArray[k].m_autoPairstart;
dgInt32 count = constraintArray[k].m_autoPaircount;
dgInt32 m0 = constraintArray[k].m_m0;
dgInt32 m1 = constraintArray[k].m_m1;
NEWTON_ASSERT(m0 >= 0);
NEWTON_ASSERT(m0 < bodyCount);
NEWTON_ASSERT(m1 >= 0);
NEWTON_ASSERT(m1 < bodyCount);
dgBody *const body0 = bodyArray[m0].m_body;
// invMass0 = body0->m_invMass[3];
simd_type invMass0 = simd_set1(body0->m_invMass[3]);
NEWTON_ASSERT((dgUnsigned64(&body0->m_invWorldInertiaMatrix) & 0x0f) == 0);
const dgMatrix &invInertia0 = body0->m_invWorldInertiaMatrix;
dgBody *const body1 = bodyArray[m1].m_body;
// invMass1 = body1->m_invMass[3];
simd_type invMass1 = simd_set1(body1->m_invMass[3]);
NEWTON_ASSERT((dgUnsigned64(&body1->m_invWorldInertiaMatrix) & 0x0f) == 0);
const dgMatrix &invInertia1 = body1->m_invWorldInertiaMatrix;
for (dgInt32 i = 0; i < count; i++) {
// JMinv[index].m_jacobian_IM0.m_linear = Jt[index].m_jacobian_IM0.m_linear.Scale (invMass0);
// JMinv[index].m_jacobian_IM0.m_angular = invInertia0.UnrotateVector (Jt[index].m_jacobian_IM0.m_angular);
// dgVector tmpDiag (JMinv[index].m_jacobian_IM0.m_linear.CompProduct(Jt[index].m_jacobian_IM0.m_linear));
// tmpDiag += JMinv[index].m_jacobian_IM0.m_angular.CompProduct(Jt[index].m_jacobian_IM0.m_angular);
// dgVector tmpAccel (JMinv[index].m_jacobian_IM0.m_linear.CompProduct(body0->m_accel));
// tmpAccel += JMinv[index].m_jacobian_IM0.m_angular.CompProduct(body0->m_alpha);
((simd_type &)JMinv[index].m_jacobian_IM0.m_linear) =
simd_mul_v((simd_type &)Jt[index].m_jacobian_IM0.m_linear, invMass0);
simd_type tmp0 = (simd_type &)Jt[index].m_jacobian_IM0.m_angular;
simd_type tmp1 =
simd_mul_v((simd_type &)invInertia0.m_front, simd_permut_v(tmp0, tmp0, PURMUT_MASK(3, 0, 0, 0)));
tmp1 =
simd_mul_add_v(tmp1, (simd_type &)invInertia0.m_up, simd_permut_v(tmp0, tmp0, PURMUT_MASK(3, 1, 1, 1)));
((simd_type &)JMinv[index].m_jacobian_IM0.m_angular) =
simd_mul_add_v(tmp1, (simd_type &)invInertia0.m_right, simd_permut_v(tmp0, tmp0, PURMUT_MASK(3, 2, 2, 2)));
simd_type tmpDiag =
simd_mul_v((simd_type &)JMinv[index].m_jacobian_IM0.m_linear, (simd_type &)Jt[index].m_jacobian_IM0.m_linear);
tmpDiag =
simd_mul_add_v(tmpDiag, (simd_type &)JMinv[index].m_jacobian_IM0.m_angular, (simd_type &)Jt[index].m_jacobian_IM0.m_angular);
simd_type tmpAccel =
simd_mul_v((simd_type &)JMinv[index].m_jacobian_IM0.m_linear, (simd_type &)body0->m_accel);
tmpAccel =
simd_mul_add_v(tmpAccel, (simd_type &)JMinv[index].m_jacobian_IM0.m_angular, (simd_type &)body0->m_alpha);
// JMinv[index].m_jacobian_IM1.m_linear = Jt[index].m_jacobian_IM1.m_linear.Scale (invMass1);
// JMinv[index].m_jacobian_IM1.m_angular = invInertia1.UnrotateVector (Jt[index].m_jacobian_IM1.m_angular);
// tmpDiag += JMinv[index].m_jacobian_IM1.m_linear.CompProduct(Jt[index].m_jacobian_IM1.m_linear);
// tmpDiag += JMinv[index].m_jacobian_IM1.m_angular.CompProduct(Jt[index].m_jacobian_IM1.m_angular);
// tmpAccel += JMinv[index].m_jacobian_IM1.m_linear.CompProduct(body1->m_accel);
// tmpAccel += JMinv[index].m_jacobian_IM1.m_angular.CompProduct(body1->m_alpha);
((simd_type &)JMinv[index].m_jacobian_IM1.m_linear) =
simd_mul_v((simd_type &)Jt[index].m_jacobian_IM1.m_linear, invMass1);
tmp0 = (simd_type &)Jt[index].m_jacobian_IM1.m_angular;
tmp1 =
simd_mul_v((simd_type &)invInertia1.m_front, simd_permut_v(tmp0, tmp0, PURMUT_MASK(3, 0, 0, 0)));
tmp1 =
simd_mul_add_v(tmp1, (simd_type &)invInertia1.m_up, simd_permut_v(tmp0, tmp0, PURMUT_MASK(3, 1, 1, 1)));
((simd_type &)JMinv[index].m_jacobian_IM1.m_angular) =
simd_mul_add_v(tmp1, (simd_type &)invInertia1.m_right, simd_permut_v(tmp0, tmp0, PURMUT_MASK(3, 2, 2, 2)));
tmpDiag =
simd_mul_add_v(tmpDiag, (simd_type &)JMinv[index].m_jacobian_IM1.m_linear, (simd_type &)Jt[index].m_jacobian_IM1.m_linear);
tmpDiag =
simd_mul_add_v(tmpDiag, (simd_type &)JMinv[index].m_jacobian_IM1.m_angular, (simd_type &)Jt[index].m_jacobian_IM1.m_angular);
tmpAccel =
simd_mul_add_v(tmpAccel, (simd_type &)JMinv[index].m_jacobian_IM1.m_linear, (simd_type &)body1->m_accel);
tmpAccel =
simd_mul_add_v(tmpAccel, (simd_type &)JMinv[index].m_jacobian_IM1.m_angular, (simd_type &)body1->m_alpha);
// coordenateAccel[index] -= (tmpAccel.m_x + tmpAccel.m_y + tmpAccel.m_z);
//NEWTON_ASSERT(tmpAccel.m128_f32[3] == dgFloat32 (0.0f));
tmpAccel = simd_add_v(tmpAccel, simd_move_hl_v(tmpAccel, tmpAccel));
tmpAccel =
simd_sub_s(zero, simd_add_s(tmpAccel, simd_permut_v(tmpAccel, tmpAccel, PURMUT_MASK(3, 3, 3, 1))));
simd_store_s(tmpAccel, &extAccel[index]);
simd_store_s(simd_add_s(simd_load_s(coordenateAccel[index]), tmpAccel),
&coordenateAccel[index]);
// force[index] = bilateralForceBounds[index].m_jointForce[0];
force[index] = jointForceFeeback[index][0];
NEWTON_ASSERT(diagDamp[index] >= dgFloat32(0.1f));
NEWTON_ASSERT(diagDamp[index] <= dgFloat32(100.0f));
dgFloat32 stiffness = DG_PSD_DAMP_TOL * diagDamp[index];
// diag = (tmpDiag.m_x + tmpDiag.m_y + tmpDiag.m_z);
tmpDiag = simd_add_v(tmpDiag, simd_move_hl_v(tmpDiag, tmpDiag));
dgFloat32 diag;
simd_store_s(
simd_add_s(tmpDiag, simd_permut_v(tmpDiag, tmpDiag, PURMUT_MASK(3, 3, 3, 1))),
&diag);
NEWTON_ASSERT(diag > dgFloat32(0.0f));
diagDamp[index] = diag * stiffness;
diag *= (dgFloat32(1.0f) + stiffness);
invDJMinvJt[index] = dgFloat32(1.0f) / diag;
index++;
}
}
#endif
}
void dgWorldDynamicUpdate::BuildJacobianMatrix(const dgIsland &island,
dgInt32 threadIndex, dgFloat32 timestep) {
NEWTON_ASSERT(island.m_bodyCount >= 2);
//NEWTON_ASSERT (island.m_jointCount >= 1);
dgJacobianMemory &solverMemory = m_solverMemory[threadIndex];
dgInt32 bodyCount = island.m_bodyCount;
dgBodyInfo *const bodyArray = &m_bodyArray[island.m_bodyStart];
NEWTON_ASSERT(
(bodyArray[0].m_body->m_accel % bodyArray[0].m_body->m_accel) == dgFloat32(0.0f));
NEWTON_ASSERT(
(bodyArray[0].m_body->m_alpha % bodyArray[0].m_body->m_alpha) == dgFloat32(0.0f));
for (dgInt32 i = 1; i < bodyCount; i++) {
dgBody *const body = bodyArray[i].m_body;
NEWTON_ASSERT(body->m_invMass.m_w > dgFloat32(0.0f));
body->AddDamingAcceleration();
body->CalcInvInertiaMatrix();
}
while (bodyCount >= solverMemory.m_maxBodiesCount) {
m_world->dgGetUserLock();
ReallocIntenalForcesMemory(bodyCount, threadIndex);
m_world->dgReleasedUserLock();
}
dgInt32 jointCount = island.m_jointCount;
dgJointInfo *const constraintArray = &m_constraintArray[island.m_jointStart];
solverMemory.m_constraintArray = constraintArray;
dgInt32 maxRowCount = 0;
for (dgInt32 j = 0; j < jointCount; j++) {
dgConstraint *const constraint = constraintArray[j].m_joint;
;
dgInt32 dof =
dgInt32(
(constraint->m_maxDOF & (DG_SIMD_WORD_SIZE - 1)) ? ((constraint->m_maxDOF & (-DG_SIMD_WORD_SIZE)) + DG_SIMD_WORD_SIZE) : constraint->m_maxDOF);
maxRowCount += dof;
}
while (maxRowCount > solverMemory.m_maxJacobiansCount) {
m_world->dgGetUserLock();
ReallocJacobiansMemory(solverMemory.m_maxJacobiansCount * 2, threadIndex);
m_world->dgReleasedUserLock();
}
dgInt32 rowCount = 0;
if (island.m_hasUnilateralJoints) {
rowCount = GetJacobialDerivatives(island, threadIndex, false, rowCount,
timestep);
}
rowCount = GetJacobialDerivatives(island, threadIndex, true, rowCount,
timestep);
solverMemory.m_rowCount = rowCount;
solverMemory.m_bodyCount = bodyCount;
solverMemory.m_bodyArray = bodyArray;
solverMemory.m_jointCount = jointCount;
solverMemory.m_timeStep = timestep;
solverMemory.m_invTimeStep = dgFloat32(1.0f) / solverMemory.m_timeStep;
dgFloat32 *const force = solverMemory.m_force;
const dgJacobianPair *const Jt = solverMemory.m_Jt;
dgJacobianPair *const JMinv = solverMemory.m_JMinv;
dgFloat32 *const diagDamp = solverMemory.m_diagDamp;
dgFloat32 *const extAccel = solverMemory.m_deltaAccel;
dgFloat32 *const invDJMinvJt = solverMemory.m_invDJMinvJt;
dgFloat32 *const coordenateAccel = solverMemory.m_coordenateAccel;
dgFloat32 **const jointForceFeeback = solverMemory.m_jointFeebackForce;
// dgInt32* const accelIsMotor = solverMemory.m_accelIsMotor;
for (dgInt32 k = 0; k < jointCount; k++) {
dgInt32 index = constraintArray[k].m_autoPairstart;
dgInt32 count = constraintArray[k].m_autoPaircount;
dgInt32 m0 = constraintArray[k].m_m0;
dgInt32 m1 = constraintArray[k].m_m1;
NEWTON_ASSERT(m0 >= 0);
NEWTON_ASSERT(m0 < bodyCount);
dgBody *const body0 = bodyArray[m0].m_body;
dgFloat32 invMass0 = body0->m_invMass[3];
const dgMatrix &invInertia0 = body0->m_invWorldInertiaMatrix;
NEWTON_ASSERT(m1 >= 0);
NEWTON_ASSERT(m1 < bodyCount);
dgBody *const body1 = bodyArray[m1].m_body;
dgFloat32 invMass1 = body1->m_invMass[3];
const dgMatrix &invInertia1 = body1->m_invWorldInertiaMatrix;
for (dgInt32 i = 0; i < count; i++) {
// dgFloat32 extenalAcceleration;
JMinv[index].m_jacobian_IM0.m_linear =
Jt[index].m_jacobian_IM0.m_linear.Scale(invMass0);
JMinv[index].m_jacobian_IM0.m_angular = invInertia0.UnrotateVector(
Jt[index].m_jacobian_IM0.m_angular);
dgVector tmpDiag(
JMinv[index].m_jacobian_IM0.m_linear.CompProduct(
Jt[index].m_jacobian_IM0.m_linear));
tmpDiag += JMinv[index].m_jacobian_IM0.m_angular.CompProduct(
Jt[index].m_jacobian_IM0.m_angular);
dgVector tmpAccel(
JMinv[index].m_jacobian_IM0.m_linear.CompProduct(body0->m_accel));
tmpAccel += JMinv[index].m_jacobian_IM0.m_angular.CompProduct(
body0->m_alpha);
JMinv[index].m_jacobian_IM1.m_linear =
Jt[index].m_jacobian_IM1.m_linear.Scale(invMass1);
JMinv[index].m_jacobian_IM1.m_angular = invInertia1.UnrotateVector(
Jt[index].m_jacobian_IM1.m_angular);
tmpDiag += JMinv[index].m_jacobian_IM1.m_linear.CompProduct(
Jt[index].m_jacobian_IM1.m_linear);
tmpDiag += JMinv[index].m_jacobian_IM1.m_angular.CompProduct(
Jt[index].m_jacobian_IM1.m_angular);
tmpAccel += JMinv[index].m_jacobian_IM1.m_linear.CompProduct(
body1->m_accel);
tmpAccel += JMinv[index].m_jacobian_IM1.m_angular.CompProduct(
body1->m_alpha);
dgFloat32 extenalAcceleration = -(tmpAccel.m_x + tmpAccel.m_y + tmpAccel.m_z);
extAccel[index] = extenalAcceleration;
coordenateAccel[index] += extenalAcceleration;
force[index] = jointForceFeeback[index][0];
// force[index] = 0.0f;
NEWTON_ASSERT(diagDamp[index] >= dgFloat32(0.1f));
NEWTON_ASSERT(diagDamp[index] <= dgFloat32(100.0f));
dgFloat32 stiffness = DG_PSD_DAMP_TOL * diagDamp[index];
dgFloat32 diag = (tmpDiag.m_x + tmpDiag.m_y + tmpDiag.m_z);
NEWTON_ASSERT(diag > dgFloat32(0.0f));
diagDamp[index] = diag * stiffness;
diag *= (dgFloat32(1.0f) + stiffness);
// solverMemory.m_diagJMinvJt[index] = diag;
invDJMinvJt[index] = dgFloat32(1.0f) / diag;
index++;
}
}
}
void dgJacobianMemory::CalculateReactionsForcesSimd(dgInt32 solverMode,
dgFloat32 tolerance) const {
#ifdef DG_BUILD_SIMD_CODE
if (m_jointCount == 0) {
// ApplyExternalForcesAndAccelerationSimd (tolerance);
ApplyExternalForcesAndAccelerationSimd(0.0f);
return;
}
if (m_jointCount == 1) {
CalculateSimpleBodyReactionsForcesSimd(tolerance);
ApplyExternalForcesAndAccelerationSimd(tolerance * 0.001f);
return;
}
if (solverMode) {
CalculateForcesGameModeSimd(solverMode, tolerance);
} else {
CalculateForcesSimulationModeSimd(tolerance);
}
#endif
}
void dgJacobianMemory::CalculateReactionsForces(dgInt32 solverMode,
dgFloat32 tolerance) const {
if (m_jointCount == 0) {
// ApplyExternalForcesAndAcceleration (tolerance);
ApplyExternalForcesAndAcceleration(0.0f);
return;
}
if (m_jointCount == 1) {
CalculateSimpleBodyReactionsForces(tolerance);
ApplyExternalForcesAndAcceleration(tolerance * 0.001f);
return;
}
if (solverMode) {
CalculateForcesGameMode(solverMode, tolerance);
} else {
CalculateForcesSimulationMode(tolerance);
}
}
void dgJacobianMemory::ApplyExternalForcesAndAccelerationSimd(
dgFloat32 tolerance) const {
#ifdef DG_BUILD_SIMD_CODE
// dgInt32 hasJointFeeback;
// simd_type zero;
// simd_type absMask;
// simd_type timeStep;
// simd_type toleranceSimd;
dgFloat32 *const force = m_force;
dgJacobian *const internalForces = m_internalForces;
const dgJacobianPair *const Jt = m_Jt;
const dgBodyInfo *const bodyArray = m_bodyArray;
const dgJointInfo *const constraintArray = m_constraintArray;
dgFloat32 **const jointForceFeeback = m_jointFeebackForce;
simd_type zero = simd_set1(dgFloat32(0.0f));
for (dgInt32 i = 0; i < m_bodyCount; i++) {
(simd_type &)internalForces[i].m_linear = zero;
(simd_type &)internalForces[i].m_angular = zero;
}
// dgInt32 tmp = 0x7fffffff;
dgInt32 hasJointFeeback = 0;
// simd_type absMask = simd_set1(*((dgFloat32*)&tmp));
simd_type timeStep = simd_set1(m_timeStep);
simd_type toleranceSimd = simd_set1(tolerance);
toleranceSimd = simd_mul_s(toleranceSimd, toleranceSimd);
for (dgInt32 i = 0; i < m_jointCount; i++) {
// dgInt32 m0;
// dgInt32 m1;
// dgInt32 first;
// dgInt32 count;
// dgInt32 index;
// simd_type y0_linear;
// simd_type y0_angular;
// simd_type y1_linear;
// simd_type y1_angular;
dgInt32 first = constraintArray[i].m_autoPairstart;
dgInt32 count = constraintArray[i].m_autoPaircount;
dgInt32 m0 = constraintArray[i].m_m0;
dgInt32 m1 = constraintArray[i].m_m1;
simd_type y0_linear = zero;
simd_type y0_angular = zero;
simd_type y1_linear = zero;
simd_type y1_angular = zero;
for (dgInt32 j = 0; j < count; j++) {
dgInt32 index = j + first;
// val = force[index];
//NEWTON_ASSERT (dgCheckFloat(val));
simd_type val = simd_set1(force[index]);
simd_store_s(val, &jointForceFeeback[index][0]);
// maxForce = simd_max_s (maxForce, simd_and_v (val, absMask));
y0_linear =
simd_mul_add_v(y0_linear, (simd_type &)Jt[index].m_jacobian_IM0.m_linear, val);
y0_angular =
simd_mul_add_v(y0_angular, (simd_type &)Jt[index].m_jacobian_IM0.m_angular, val);
y1_linear =
simd_mul_add_v(y1_linear, (simd_type &)Jt[index].m_jacobian_IM1.m_linear, val);
y1_angular =
simd_mul_add_v(y1_angular, (simd_type &)Jt[index].m_jacobian_IM1.m_angular, val);
}
// if (constraintArray[i].m_joint->GetId() == dgContactConstraintId) {
// dgFloat32 force;
// simd_store_s (maxForce, &force);
// m_world->AddToBreakQueue ((dgContact*)constraintArray[i].m_joint, force);
// }
hasJointFeeback |= (constraintArray[i].m_joint->m_updaFeedbackCallback ? 1 : 0);
// if (constraintArray[i].m_joint->m_updaFeedbackCallback) {
// constraintArray[i].m_joint->m_updaFeedbackCallback (*constraintArray[i].m_joint, m_timeStep, m_threadIndex);
// }
(simd_type &)internalForces[m0].m_linear =
simd_add_v((simd_type &)internalForces[m0].m_linear, y0_linear);
(simd_type &)internalForces[m0].m_angular =
simd_add_v((simd_type &)internalForces[m0].m_angular, y0_angular);
(simd_type &)internalForces[m1].m_linear =
simd_add_v((simd_type &)internalForces[m1].m_linear, y1_linear);
(simd_type &)internalForces[m1].m_angular =
simd_add_v((simd_type &)internalForces[m1].m_angular, y1_angular);
}
for (dgInt32 i = 1; i < m_bodyCount; i++) {
// simd_type tmp;
// simd_type accel;
// simd_type alpha;
// dgBody* body;
dgBody *const body = bodyArray[i].m_body;
(simd_type &)body->m_accel =
simd_add_v((simd_type &)internalForces[i].m_linear, (simd_type &)body->m_accel);
(simd_type &)body->m_alpha =
simd_add_v((simd_type &)internalForces[i].m_angular, (simd_type &)body->m_alpha);
// dgVector accel (body->m_accel.Scale (body->m_invMass.m_w));
simd_type accel =
simd_mul_v((simd_type &)body->m_accel, simd_set1(body->m_invMass.m_w));
// dgVector alpha (body->m_invWorldInertiaMatrix.RotateVector (body->m_alpha));
simd_type alpha =
simd_mul_add_v(simd_mul_add_v(simd_mul_v((simd_type &)body->m_invWorldInertiaMatrix[0], simd_set1(body->m_alpha.m_x)),
(simd_type &)body->m_invWorldInertiaMatrix[1], simd_set1(body->m_alpha.m_y)),
(simd_type &)body->m_invWorldInertiaMatrix[2], simd_set1(body->m_alpha.m_z));
// dgVector accel (body->m_accel.Scale (body->m_invMass.m_w));
// dgVector alpha (body->m_invWorldInertiaMatrix.RotateVector (body->m_alpha));
// error = accel % accel;
// if (error < accelTol2) {
// accel = zero;
// body->m_accel = zero;
// }
simd_type tmp = simd_mul_v(accel, accel);
tmp = simd_add_v(tmp, simd_move_hl_v(tmp, tmp));
tmp = simd_add_s(tmp, simd_permut_v(tmp, tmp, PURMUT_MASK(0, 0, 0, 1)));
tmp = simd_cmplt_s(tmp, toleranceSimd);
tmp = simd_permut_v(tmp, tmp, PURMUT_MASK(0, 0, 0, 0));
accel = simd_andnot_v(accel, tmp);
(simd_type &)body->m_accel = simd_andnot_v((simd_type &)body->m_accel, tmp);
// error = alpha % alpha;
// if (error < accelTol2) {
// alpha = zero;
// body->m_alpha = zero;
// }
tmp = simd_mul_v(alpha, alpha);
tmp = simd_add_v(tmp, simd_move_hl_v(tmp, tmp));
tmp = simd_add_s(tmp, simd_permut_v(tmp, tmp, PURMUT_MASK(0, 0, 0, 1)));
tmp = simd_cmplt_s(tmp, toleranceSimd);
tmp = simd_permut_v(tmp, tmp, PURMUT_MASK(0, 0, 0, 0));
alpha = simd_andnot_v(alpha, tmp);
(simd_type &)body->m_alpha = simd_andnot_v((simd_type &)body->m_alpha, tmp);
(simd_type &)body->m_netForce = (simd_type &)body->m_accel;
(simd_type &)body->m_netTorque = (simd_type &)body->m_alpha;
(simd_type &)body->m_veloc =
simd_mul_add_v((simd_type &)body->m_veloc, accel, timeStep);
(simd_type &)body->m_omega =
simd_mul_add_v((simd_type &)body->m_omega, alpha, timeStep);
}
if (hasJointFeeback) {
for (dgInt32 i = 0; i < m_jointCount; i++) {
if (constraintArray[i].m_joint->m_updaFeedbackCallback) {
constraintArray[i].m_joint->m_updaFeedbackCallback(
*constraintArray[i].m_joint, m_timeStep, m_threadIndex);
}
}
}
#endif
}
void dgJacobianMemory::ApplyExternalForcesAndAcceleration(
dgFloat32 tolerance) const {
// dgInt32 hasJointFeeback;
// dgFloat32 accelTol2;
dgFloat32 *const force = m_force;
const dgJacobianPair *const Jt = m_Jt;
const dgBodyInfo *const bodyArray = m_bodyArray;
const dgJointInfo *const constraintArray = m_constraintArray;
dgFloat32 **const jointForceFeeback = m_jointFeebackForce;
dgJacobian *const internalForces = m_internalForces;
dgVector zero(dgFloat32(0.0f), dgFloat32(0.0f), dgFloat32(0.0f),
dgFloat32(0.0f));
for (dgInt32 i = 0; i < m_bodyCount; i++) {
internalForces[i].m_linear = zero;
internalForces[i].m_angular = zero;
}
dgInt32 hasJointFeeback = 0;
dgFloat32 accelTol2 = tolerance * tolerance;
for (dgInt32 i = 0; i < m_jointCount; i++) {
// dgInt32 m0;
// dgInt32 m1;
// dgInt32 first;
// dgInt32 count;
// dgInt32 index;
// dgFloat32 val;
dgInt32 first = constraintArray[i].m_autoPairstart;
dgInt32 count = constraintArray[i].m_autoPaircount;
dgInt32 m0 = constraintArray[i].m_m0;
dgInt32 m1 = constraintArray[i].m_m1;
dgJacobian y0;
dgJacobian y1;
y0.m_linear = zero;
y0.m_angular = zero;
y1.m_linear = zero;
y1.m_angular = zero;
for (dgInt32 j = 0; j < count; j++) {
dgInt32 index = j + first;
dgFloat32 val = force[index];
NEWTON_ASSERT(dgCheckFloat(val));
jointForceFeeback[index][0] = val;
y0.m_linear += Jt[index].m_jacobian_IM0.m_linear.Scale(val);
y0.m_angular += Jt[index].m_jacobian_IM0.m_angular.Scale(val);
y1.m_linear += Jt[index].m_jacobian_IM1.m_linear.Scale(val);
y1.m_angular += Jt[index].m_jacobian_IM1.m_angular.Scale(val);
}
// if (constraintArray[i].m_joint->GetId() == dgContactConstraintId) {
// m_world->AddToBreakQueue ((dgContact*)constraintArray[i].m_joint, maxForce);
// }
// hasJointFeeback |= dgUnsigned32 (constraintArray[i].m_joint->m_updaFeedbackCallback);
hasJointFeeback |= (constraintArray[i].m_joint->m_updaFeedbackCallback ? 1 : 0);
internalForces[m0].m_linear += y0.m_linear;
internalForces[m0].m_angular += y0.m_angular;
internalForces[m1].m_linear += y1.m_linear;
internalForces[m1].m_angular += y1.m_angular;
}
for (dgInt32 i = 1; i < m_bodyCount; i++) {
dgBody *const body = bodyArray[i].m_body;
body->m_accel += internalForces[i].m_linear;
body->m_alpha += internalForces[i].m_angular;
dgVector accel(body->m_accel.Scale(body->m_invMass.m_w));
dgVector alpha(body->m_invWorldInertiaMatrix.RotateVector(body->m_alpha));
dgFloat32 error = accel % accel;
if (error < accelTol2) {
accel = zero;
body->m_accel = zero;
}
error = alpha % alpha;
if (error < accelTol2) {
alpha = zero;
body->m_alpha = zero;
}
body->m_netForce = body->m_accel;
body->m_netTorque = body->m_alpha;
body->m_veloc += accel.Scale(m_timeStep);
body->m_omega += alpha.Scale(m_timeStep);
}
if (hasJointFeeback) {
for (dgInt32 i = 0; i < m_jointCount; i++) {
if (constraintArray[i].m_joint->m_updaFeedbackCallback) {
constraintArray[i].m_joint->m_updaFeedbackCallback(
reinterpret_cast<const NewtonJoint *>(constraintArray[i].m_joint), m_timeStep, m_threadIndex);
}
}
}
}
void dgJacobianMemory::SwapRowsSimd(dgInt32 i, dgInt32 j) const {
#ifdef DG_BUILD_SIMD_CODE
// simd_type tmp0;
// simd_type tmp1;
// simd_type tmp2;
// simd_type tmp3;
// simd_type tmp4;
// simd_type tmp5;
// simd_type tmp6;
// simd_type tmp7;
NEWTON_ASSERT(i != j);
// Swap (m_Jt[i], m_Jt[j]);
dgJacobianPair *const ptr0 = m_Jt;
simd_type tmp0 = (simd_type &)ptr0[i].m_jacobian_IM0.m_linear;
simd_type tmp1 = (simd_type &)ptr0[i].m_jacobian_IM0.m_angular;
simd_type tmp2 = (simd_type &)ptr0[i].m_jacobian_IM1.m_linear;
simd_type tmp3 = (simd_type &)ptr0[i].m_jacobian_IM1.m_angular;
simd_type tmp4 = (simd_type &)ptr0[j].m_jacobian_IM0.m_linear;
simd_type tmp5 = (simd_type &)ptr0[j].m_jacobian_IM0.m_angular;
simd_type tmp6 = (simd_type &)ptr0[j].m_jacobian_IM1.m_linear;
simd_type tmp7 = (simd_type &)ptr0[j].m_jacobian_IM1.m_angular;
(simd_type &)ptr0[j].m_jacobian_IM0.m_linear = tmp0;
(simd_type &)ptr0[j].m_jacobian_IM0.m_angular = tmp1;
(simd_type &)ptr0[j].m_jacobian_IM1.m_linear = tmp2;
(simd_type &)ptr0[j].m_jacobian_IM1.m_angular = tmp3;
(simd_type &)ptr0[i].m_jacobian_IM0.m_linear = tmp4;
(simd_type &)ptr0[i].m_jacobian_IM0.m_angular = tmp5;
(simd_type &)ptr0[i].m_jacobian_IM1.m_linear = tmp6;
(simd_type &)ptr0[i].m_jacobian_IM1.m_angular = tmp7;
// Swap (m_JMinv[i], m_JMinv[j]);
dgJacobianPair *const ptr1 = m_JMinv;
tmp0 = (simd_type &)ptr1[i].m_jacobian_IM0.m_linear;
tmp1 = (simd_type &)ptr1[i].m_jacobian_IM0.m_angular;
tmp2 = (simd_type &)ptr1[i].m_jacobian_IM1.m_linear;
tmp3 = (simd_type &)ptr1[i].m_jacobian_IM1.m_angular;
tmp4 = (simd_type &)ptr1[j].m_jacobian_IM0.m_linear;
tmp5 = (simd_type &)ptr1[j].m_jacobian_IM0.m_angular;
tmp6 = (simd_type &)ptr1[j].m_jacobian_IM1.m_linear;
tmp7 = (simd_type &)ptr1[j].m_jacobian_IM1.m_angular;
(simd_type &)ptr1[j].m_jacobian_IM0.m_linear = tmp0;
(simd_type &)ptr1[j].m_jacobian_IM0.m_angular = tmp1;
(simd_type &)ptr1[j].m_jacobian_IM1.m_linear = tmp2;
(simd_type &)ptr1[j].m_jacobian_IM1.m_angular = tmp3;
(simd_type &)ptr1[i].m_jacobian_IM0.m_linear = tmp4;
(simd_type &)ptr1[i].m_jacobian_IM0.m_angular = tmp5;
(simd_type &)ptr1[i].m_jacobian_IM1.m_linear = tmp6;
(simd_type &)ptr1[i].m_jacobian_IM1.m_angular = tmp7;
Swap(m_diagDamp[i], m_diagDamp[j]);
Swap(m_invDJMinvJt[i], m_invDJMinvJt[j]);
// Swap (m_jacobianIndexArray[i], m_jacobianIndexArray[j]);
Swap(m_normalForceIndex[i], m_normalForceIndex[j]);
// Swap (m_lowerBoundFrictionForce[i], m_lowerBoundFrictionForce[j]);
// Swap (m_upperBoundFrictionForce[i], m_upperBoundFrictionForce[j]);
Swap(m_lowerBoundFrictionCoefficent[i], m_lowerBoundFrictionCoefficent[j]);
Swap(m_upperBoundFrictionCoefficent[i], m_upperBoundFrictionCoefficent[j]);
Swap(m_jointFeebackForce[i], m_jointFeebackForce[j]);
Swap(m_coordenateAccel[i], m_coordenateAccel[j]);
Swap(m_force[i], m_force[j]);
Swap(m_accel[i], m_accel[j]);
// Swap (m_forceStep[i], m_forceStep[j]);
Swap(m_deltaAccel[i], m_deltaAccel[j]);
Swap(m_deltaForce[i], m_deltaForce[j]);
#else
#endif
}
void dgJacobianMemory::SwapRows(dgInt32 i, dgInt32 j) const {
NEWTON_ASSERT(i != j);
#define SwapMacro(a, b) Swap(a, b)
SwapMacro(m_Jt[i], m_Jt[j]);
SwapMacro(m_JMinv[i], m_JMinv[j]);
SwapMacro(m_diagDamp[i], m_diagDamp[j]);
SwapMacro(m_invDJMinvJt[i], m_invDJMinvJt[j]);
// SwapMacro (m_diagJMinvJt[i], m_diagJMinvJt[j]);
// SwapMacro (m_frictionThreshold[i], m_frictionThreshold[j]);
// SwapMacro (m_jacobianIndexArray[i], m_jacobianIndexArray[j]);
// SwapMacro (m_bilateralForceBounds[i], m_bilateralForceBounds[j]);
SwapMacro(m_normalForceIndex[i], m_normalForceIndex[j]);
// SwapMacro (m_lowerBoundFrictionForce[i], m_lowerBoundFrictionForce[j]);
// SwapMacro (m_upperBoundFrictionForce[i], m_upperBoundFrictionForce[j]);
SwapMacro(m_lowerBoundFrictionCoefficent[i],
m_lowerBoundFrictionCoefficent[j]);
SwapMacro(m_upperBoundFrictionCoefficent[i],
m_upperBoundFrictionCoefficent[j]);
SwapMacro(m_jointFeebackForce[i], m_jointFeebackForce[j]);
SwapMacro(m_coordenateAccel[i], m_coordenateAccel[j]);
SwapMacro(m_force[i], m_force[j]);
SwapMacro(m_accel[i], m_accel[j]);
// SwapMacro (m_forceStep[i], m_forceStep[j]);
SwapMacro(m_deltaAccel[i], m_deltaAccel[j]);
SwapMacro(m_deltaForce[i], m_deltaForce[j]);
}
void dgJacobianMemory::CalculateSimpleBodyReactionsForcesSimd(
dgFloat32 maxAccNorm) const {
#ifdef DG_BUILD_SIMD_CODE
simd_type accelPtr[DG_CONSTRAINT_MAX_ROWS / DG_SIMD_WORD_SIZE];
simd_type activeRowPtr[DG_CONSTRAINT_MAX_ROWS / DG_SIMD_WORD_SIZE];
simd_type lowBoundPtr[DG_CONSTRAINT_MAX_ROWS / DG_SIMD_WORD_SIZE];
simd_type highBoundPtr[DG_CONSTRAINT_MAX_ROWS / DG_SIMD_WORD_SIZE];
simd_type deltaAccelPtr[DG_CONSTRAINT_MAX_ROWS / DG_SIMD_WORD_SIZE];
simd_type deltaForcePtr[DG_CONSTRAINT_MAX_ROWS / DG_SIMD_WORD_SIZE];
dgFloat32 *const force = m_force;
dgFloat32 *const activeRow = (dgFloat32 *)activeRowPtr;
dgFloat32 *const lowBound = (dgFloat32 *)lowBoundPtr;
dgFloat32 *const highBound = (dgFloat32 *)highBoundPtr;
dgFloat32 *const accel = (dgFloat32 *)accelPtr;
dgFloat32 *const deltaAccel = (dgFloat32 *)deltaAccelPtr;
dgFloat32 *const deltaForce = (dgFloat32 *)deltaForcePtr;
const dgJacobianPair *const Jt = m_Jt;
const dgJacobianPair *const JMinv = m_JMinv;
const dgFloat32 *const diagDamp = m_diagDamp;
const dgFloat32 *const invDJMinvJt = m_invDJMinvJt;
const dgInt32 *const normalForceIndex = m_normalForceIndex;
const dgFloat32 *const coordenateAccel = m_coordenateAccel;
const dgJointInfo *const constraintArray = m_constraintArray;
const dgFloat32 *const lowerFriction = m_lowerBoundFrictionCoefficent;
const dgFloat32 *const upperFriction = m_upperBoundFrictionCoefficent;
dgFloatSign tmpIndex;
tmpIndex.m_integer.m_iVal = 0x7fffffff;
simd_type signMask = simd_set1(tmpIndex.m_fVal);
simd_type one = simd_set1(dgFloat32(1.0f));
simd_type zero = simd_set1(dgFloat32(0.0f));
simd_type tol_pos_1eNeg5 = simd_set1(dgFloat32(1.0e-5f));
simd_type tol_pos_1eNeg8 = simd_set1(dgFloat32(1.0e-8f));
simd_type tol_pos_1eNeg16 = simd_set1(dgFloat32(1.0e-16f));
simd_type tol_neg_1eNeg16 = simd_set1(dgFloat32(-1.0e-16f));
// dgVector zero (dgFloat32 (0.0f), dgFloat32 (0.0f), dgFloat32 (0.0f), dgFloat32 (0.0f));
dgInt32 count = constraintArray[0].m_autoPaircount;
NEWTON_ASSERT(constraintArray[0].m_autoPairstart == 0);
dgInt32 roundCount = count & (-DG_SIMD_WORD_SIZE);
if (roundCount != count) {
roundCount += 4;
for (dgInt32 i = count; i < roundCount; i++) {
// force[i] = dgFloat32 (0.0f);
// accel[i] = dgFloat32 (0.0f);
// activeRow[i] = dgFloat32 (0.0f);
// deltaAccel[i] = dgFloat32 (0.0f);
// deltaForce[i] = dgFloat32 (0.0f);
m_normalForceIndex[i] = -1;
simd_store_s(zero, &force[i]);
simd_store_s(zero, &accel[i]);
simd_store_s(zero, &activeRow[i]);
simd_store_s(zero, &deltaAccel[i]);
simd_store_s(zero, &deltaForce[i]);
simd_store_s(one, &m_lowerBoundFrictionCoefficent[i]);
simd_store_s(zero, &m_upperBoundFrictionCoefficent[i]);
}
}
// j0 = jacobianIndexArray[0].m_m0;
// j1 = jacobianIndexArray[0].m_m1;
// dgInt32 m0 = constraintArray[0].m_m0;
// dgInt32 m1 = constraintArray[0].m_m1;
// dgBody* const body0 = bodyArray[m0].m_body;
// dgBody* const body1 = bodyArray[m1].m_body;
// ak = dgSqrt((body1->m_accel % body1->m_accel) + (body0->m_accel % body0->m_accel));
// tmp3 = simd_mul_add_v (simd_mul_v ((simd_type&)body1->m_accel, (simd_type&)body1->m_accel),
// (simd_type&)body0->m_accel, (simd_type&)body0->m_accel);
// tmp3 = simd_add_v (tmp3, simd_permut_v (tmp3, tmp3, PURMUT_MASK (0, 0, 3, 2)));
// tmp3 = simd_add_s (tmp3, simd_permut_v (tmp3, tmp3, PURMUT_MASK (0, 0, 0, 1)));
// tmp3 = simd_mul_s (tmp3, simd_rsqrt_s(tmp3));
// maxPasses = 0;
simd_type tmp2 = zero;
// tmp3 = simd_permut_v (tmp3, tmp3, PURMUT_MASK (0, 0, 0, 0));
// for (i = 0; i <roundCount; i ++) {
for (dgInt32 i = 0; i < roundCount; i += DG_SIMD_WORD_SIZE) {
// simd_type tmp5;
// simd_type tmp6;
// simd_type tmp7;
// bool test;
// bool lowBoundTest;
// bool highBoundTest;
// dgFloat32 normalForce;
// simd_type lowBoundTest;
// simd_type highBoundTest;
// k = normalForceIndex[i];
// if (k >= 0) {
// val = force[k];
// if (val < dgFloat32 (1.0e-2f)) {
// val = ak;
// }
// } else {
// val = dgFloat32 (1.0f);
// }
// normalForce = force[normalForceIndex[i]] ;
// normalForce = (normalForce > dgFloat32 (1.0e-2f)) ? normalForce : ak;
simd_type normalForce =
simd_move_lh_v(simd_pack_lo_v(simd_load_s(force[normalForceIndex[i + 0]]),
simd_load_s(force[normalForceIndex[i + 1]])),
simd_pack_lo_v(simd_load_s(force[normalForceIndex[i + 2]]),
simd_load_s(force[normalForceIndex[i + 3]])));
// test = simd_cmplt_v (normalForce, tol_pos_1eNeg2);
// normalForce = simd_or_v (simd_and_v (tmp3, test), simd_andnot_v (normalForce, test));
// lowBound[i] = normalForce * lowerFriction[i];
// highBound[i] = normalForce * upperFriction[i];
(simd_type &)lowBound[i] =
simd_mul_v(normalForce, (simd_type &)lowerFriction[i]);
(simd_type &)highBound[i] =
simd_mul_v(normalForce, (simd_type &)upperFriction[i]);
// activeRow[i] = dgFloat32 (1.0f);
// if (force[i] < lowBound[i]) {
// maxPasses --;
// force[i] = lowBound[i];
// activeRow[i] = dgFloat32 (0.0f);
// } else if (force[i] > highBound[i]) {
// maxPasses --;
// force[i] = highBound[i];
// activeRow[i] = dgFloat32 (0.0f);
// }
// lowBoundTest = force[i] < lowBound[i];
// highBoundTest = force[i] > highBound[i];
// lowBoundTest = simd_cmplt_v ((simd_type&)force[i], (simd_type&)lowBound[i]);
// highBoundTest = simd_cmpgt_v ((simd_type&)force[i], (simd_type&)highBound[i]);
// test = lowBoundTest | highBoundTest;
// test = simd_or_v (lowBoundTest, highBoundTest);
simd_type test =
simd_or_v(simd_cmplt_v((simd_type &)force[i], (simd_type &)lowBound[i]), simd_cmpgt_v((simd_type &)force[i], (simd_type &)highBound[i]));
// activeRow[i] = test ? dgFloat32 (0.0f) : dgFloat32 (1.0f);
(simd_type &)activeRow[i] = simd_andnot_v(one, test);
// maxPasses += test ? 0 : 1;
tmp2 = simd_add_v(tmp2, (simd_type &)activeRow[i]);
// force[i] = lowBoundTest ? lowBound[i] : (highBoundTest ? highBound[i] : force[i]);
// normalForce = simd_or_v (simd_and_v ((simd_type&)lowBound[i], lowBoundTest), simd_and_v ((simd_type&)highBound[i], highBoundTest));
// (simd_type&)force[i] = simd_or_v (simd_and_v (normalForce, test), simd_andnot_v ((simd_type&)force[i], test));
(simd_type &)force[i] =
simd_min_v((simd_type &)highBound[i], simd_max_v((simd_type &)force[i], (simd_type &)lowBound[i]));
}
tmp2 = simd_add_v(tmp2, simd_move_hl_v(tmp2, tmp2));
dgInt32 maxPasses =
simd_store_is(simd_add_s(tmp2, simd_permut_v(tmp2, tmp2, PURMUT_MASK(0, 0, 0, 1))));
// y0.m_linear = dgVector (dgFloat32 (0.0f), dgFloat32 (0.0f), dgFloat32 (0.0f), dgFloat32 (0.0f));
// y0.m_angular = dgVector (dgFloat32 (0.0f), dgFloat32 (0.0f), dgFloat32 (0.0f), dgFloat32 (0.0f));
// y1.m_linear = dgVector (dgFloat32 (0.0f), dgFloat32 (0.0f), dgFloat32 (0.0f), dgFloat32 (0.0f));
// y1.m_angular = dgVector (dgFloat32 (0.0f), dgFloat32 (0.0f), dgFloat32 (0.0f), dgFloat32 (0.0f));
simd_type y0_linear = zero;
simd_type y0_angular = zero;
simd_type y1_linear = zero;
simd_type y1_angular = zero;
for (dgInt32 i = 0; i < count; i++) {
// force = m_force[i];
simd_type tmp1 = simd_set1(force[i]);
// y0.m_linear += m_Jt[i].m_jacobian_IM0.m_linear.Scale (force);
// y0.m_angular += m_Jt[i].m_jacobian_IM0.m_angular.Scale (force);
// y1.m_linear += m_Jt[i].m_jacobian_IM1.m_linear.Scale (force);
// y1.m_angular += m_Jt[i].m_jacobian_IM1.m_angular.Scale (force);
y0_linear =
simd_mul_add_v(y0_linear, (simd_type &)Jt[i].m_jacobian_IM0.m_linear, tmp1);
y0_angular =
simd_mul_add_v(y0_angular, (simd_type &)Jt[i].m_jacobian_IM0.m_angular, tmp1);
y1_linear =
simd_mul_add_v(y1_linear, (simd_type &)Jt[i].m_jacobian_IM1.m_linear, tmp1);
y1_angular =
simd_mul_add_v(y1_angular, (simd_type &)Jt[i].m_jacobian_IM1.m_angular, tmp1);
}
// akNum = dgFloat32 (0.0f);
// accNorm = dgFloat32(0.0f);
simd_type tmp0 = zero;
simd_type tmp1 = zero;
for (dgInt32 i = 0; i < count; i++) {
// dgVector acc (m_JMinv[i].m_jacobian_IM0.m_linear.CompProduct(y0.m_linear));
// acc += m_JMinv[i].m_jacobian_IM0.m_angular.CompProduct (y0.m_angular);
// acc += m_JMinv[i].m_jacobian_IM1.m_linear.CompProduct (y1.m_linear);
// acc += m_JMinv[i].m_jacobian_IM1.m_angular.CompProduct (y1.m_angular);
simd_type tmp2 =
simd_mul_v((simd_type &)JMinv[i].m_jacobian_IM0.m_linear, y0_linear);
tmp2 =
simd_mul_add_v(tmp2, (simd_type &)JMinv[i].m_jacobian_IM0.m_angular, y0_angular);
tmp2 =
simd_mul_add_v(tmp2, (simd_type &)JMinv[i].m_jacobian_IM1.m_linear, y1_linear);
tmp2 =
simd_mul_add_v(tmp2, (simd_type &)JMinv[i].m_jacobian_IM1.m_angular, y1_angular);
// m_accel[i] = m_coordenateAccel[i] - acc.m_x - acc.m_y - acc.m_z - m_force[i] * m_diagDamp[i];
tmp2 = simd_add_v(tmp2, simd_move_hl_v(tmp2, tmp2));
tmp2 = simd_add_s(tmp2, simd_permut_v(tmp2, tmp2, PURMUT_MASK(3, 3, 3, 1)));
tmp2 =
simd_sub_s(simd_load_s(coordenateAccel[i]), simd_mul_add_s(tmp2, simd_load_s(force[i]), simd_load_s(diagDamp[i])));
simd_store_s(tmp2, &accel[i]);
// m_deltaForce[i] = m_accel[i] * m_invDJMinvJt[i] * activeRow[i];
simd_type tmp3 =
simd_mul_s(tmp2, simd_mul_s(simd_load_s(invDJMinvJt[i]), simd_load_s(activeRow[i])));
simd_store_s(tmp3, &deltaForce[i]);
// akNum += m_accel[i] * m_deltaForce[i];
tmp0 = simd_mul_add_v(tmp0, tmp3, tmp2);
// accNorm = GetMax (dgAbsf (m_accel[i] * activeRow[i]), accNorm);
tmp1 =
simd_max_s(tmp1, simd_and_v(simd_mul_v(tmp2, simd_load_s(activeRow[i])), signMask));
}
dgFloat32 akNum;
dgFloat32 accNorm;
simd_store_s(tmp0, &akNum);
simd_store_s(tmp1, &accNorm);
for (dgInt32 i = 0; (i < maxPasses) && (accNorm > maxAccNorm); i++) {
// dgInt32 clampedForceIndex;
// dgFloat32 ak;
// dgFloat32 clampedForceIndexValue;
// MatrixTimeVector (forceRows, m_deltaForce, m_deltaAccel);
// y0.m_linear = dgVector (dgFloat32 (0.0f), dgFloat32 (0.0f), dgFloat32 (0.0f), dgFloat32 (0.0f));
// y0.m_angular = dgVector (dgFloat32 (0.0f), dgFloat32 (0.0f), dgFloat32 (0.0f), dgFloat32 (0.0f));
// y1.m_linear = dgVector (dgFloat32 (0.0f), dgFloat32 (0.0f), dgFloat32 (0.0f), dgFloat32 (0.0f));
// y1.m_angular = dgVector (dgFloat32 (0.0f), dgFloat32 (0.0f), dgFloat32 (0.0f), dgFloat32 (0.0f));
simd_type y0_linear = zero;
simd_type y0_angular = zero;
simd_type y1_linear = zero;
simd_type y1_angular = zero;
for (dgInt32 k = 0; k < count; k++) {
// ak = m_deltaForce[k];
simd_type tmp1 = simd_set1(deltaForce[k]);
// y0.m_linear += m_Jt[k].m_jacobian_IM0.m_linear.Scale (ak);
// y0.m_angular += m_Jt[k].m_jacobian_IM0.m_angular.Scale (ak);
// y1.m_linear += m_Jt[k].m_jacobian_IM1.m_linear.Scale (ak);
// y1.m_angular += m_Jt[k].m_jacobian_IM1.m_angular.Scale (ak);
y0_linear =
simd_mul_add_v(y0_linear, (simd_type &)Jt[k].m_jacobian_IM0.m_linear, tmp1);
y0_angular =
simd_mul_add_v(y0_angular, (simd_type &)Jt[k].m_jacobian_IM0.m_angular, tmp1);
y1_linear =
simd_mul_add_v(y1_linear, (simd_type &)Jt[k].m_jacobian_IM1.m_linear, tmp1);
y1_angular =
simd_mul_add_v(y1_angular, (simd_type &)Jt[k].m_jacobian_IM1.m_angular, tmp1);
}
// akDen = dgFloat32 (0.0f);
simd_type tmp0 = zero;
for (dgInt32 k = 0; k < count; k++) {
// dgVector acc (m_JMinv[k].m_jacobian_IM0.m_linear.CompProduct(y0.m_linear));
// acc += m_JMinv[k].m_jacobian_IM0.m_angular.CompProduct(y0.m_angular);
// acc += m_JMinv[k].m_jacobian_IM1.m_linear.CompProduct(y1.m_linear);
// acc += m_JMinv[k].m_jacobian_IM1.m_angular.CompProduct(y1.m_angular);
simd_type tmp2 =
simd_mul_v((simd_type &)JMinv[k].m_jacobian_IM0.m_linear, y0_linear);
tmp2 =
simd_mul_add_v(tmp2, (simd_type &)JMinv[k].m_jacobian_IM0.m_angular, y0_angular);
tmp2 =
simd_mul_add_v(tmp2, (simd_type &)JMinv[k].m_jacobian_IM1.m_linear, y1_linear);
tmp2 =
simd_mul_add_v(tmp2, (simd_type &)JMinv[k].m_jacobian_IM1.m_angular, y1_angular);
// m_deltaAccel[k] = acc.m_x + acc.m_y + acc.m_z + m_deltaForce[k] * m_diagDamp[k];
simd_type tmp1 = simd_load_s(deltaForce[k]);
tmp2 = simd_add_v(tmp2, simd_move_hl_v(tmp2, tmp2));
tmp2 =
simd_add_s(tmp2, simd_permut_v(tmp2, tmp2, PURMUT_MASK(3, 3, 3, 1)));
tmp2 = simd_mul_add_s(tmp2, tmp1, simd_load_s(diagDamp[k]));
simd_store_s(tmp2, &deltaAccel[k]);
// akDen += m_deltaAccel[k] * m_deltaForce[k];
tmp0 = simd_mul_add_v(tmp0, tmp2, tmp1);
}
//NEWTON_ASSERT (akDen > dgFloat32 (0.0f));
// akDen = GetMax (akDen, dgFloat32(1.0e-16f));
//NEWTON_ASSERT (dgAbsf (akDen) >= dgFloat32(1.0e-16f));
// ak = akNum / akDen;
tmp0 = simd_div_s(simd_load_s(akNum), simd_max_s(tmp0, tol_pos_1eNeg16));
// simd_type min_index;
// simd_type minClampIndex;
// simd_type min_index_step;
// simd_type campedIndexValue;
simd_type campedIndexValue = zero;
simd_type minClampIndex = simd_set1(dgFloat32(-1.0f));
simd_type min_index_step = simd_set1(dgFloat32(4.0f));
tmp0 = simd_permut_v(tmp0, tmp0, PURMUT_MASK(0, 0, 0, 0));
simd_type min_index =
simd_set(dgFloat32(0.0f), dgFloat32(1.0f), dgFloat32(2.0f), dgFloat32(3.0f));
// for (k = 0; k < roundCount; k ++) {
for (dgInt32 k = 0; k < roundCount; k += DG_SIMD_WORD_SIZE) {
// simd_type val;
// simd_type num;
// simd_type den;
// simd_type test;
// simd_type negTest;
// simd_type posTest;
// simd_type negValTest;
// simd_type posValTest;
// simd_type negDeltaForceTest;
// simd_type posDeltaForceTest;
// Make sure AK is not negative
// val = force[k] + ak * deltaForce[k];
simd_type val =
simd_mul_add_v((simd_type &)force[k], tmp0, (simd_type &)deltaForce[k]);
// negValTest = val < lowBound[k];
simd_type negValTest = simd_cmplt_v(val, (simd_type &)lowBound[k]);
// posValTest = val > highBound[k];
simd_type posValTest = simd_cmpgt_v(val, (simd_type &)highBound[k]);
// negDeltaForceTest = deltaForce[k] < dgFloat32 (-1.0e-16f);
simd_type negDeltaForceTest =
simd_cmplt_v((simd_type &)deltaForce[k], tol_neg_1eNeg16);
// posDeltaForceTest = deltaForce[k] > dgFloat32 ( 1.0e-16f);
simd_type posDeltaForceTest =
simd_cmpgt_v((simd_type &)deltaForce[k], tol_pos_1eNeg16);
// negTest = negValTest & negDeltaForceTest;
simd_type negTest = simd_and_v(negValTest, negDeltaForceTest);
// posTest = posValTest & posDeltaForceTest;
simd_type posTest = simd_and_v(posValTest, posDeltaForceTest);
// test = negTest | posTest;
simd_type test = simd_or_v(negTest, posTest);
// num = negTest ? lowBound[k] : (posTest ? highBound[k] : force[k]);
simd_type num =
simd_or_v(simd_and_v((simd_type &)lowBound[k], negTest), simd_and_v((simd_type &)highBound[k], posTest));
num =
simd_or_v(simd_and_v(num, test), simd_andnot_v((simd_type &)force[k], test));
// den = test ? deltaForce[k] : dgFloat32 (1.0f);
simd_type den =
simd_or_v(simd_and_v((simd_type &)deltaForce[k], test), simd_andnot_v(one, test));
// test = test & (activeRow[k] > dgFloat32 (0.0f));
test = simd_and_v(test, simd_cmpgt_v((simd_type &)activeRow[k], zero));
// NEWTON_ASSERT (dgAbsf (den) > 1.0e-16f);
// ak = test ? (num - force[k]) / den : ak;
tmp0 =
simd_or_v(simd_div_v(simd_sub_v(num, (simd_type &)force[k]), den), simd_andnot_v(tmp0, test));
// ak = (ak < dgFloat32 (1.0e-8f)) ? dgFloat32 (0.0f) : ak;
tmp0 = simd_and_v(tmp0, simd_cmpgt_v(tmp0, tol_pos_1eNeg8));
// clampedForceIndex = test ? k : clampedForceIndex;
minClampIndex =
simd_or_v(simd_and_v(min_index, test), simd_andnot_v(minClampIndex, test));
min_index = simd_add_v(min_index, min_index_step);
// clampedForceIndexValue = test ? num : clampedForceIndexValue;
campedIndexValue =
simd_or_v(simd_and_v(num, test), simd_andnot_v(campedIndexValue, test));
}
simd_type tmp2 = simd_move_hl_v(tmp0, tmp0);
simd_type tmp3 = simd_cmplt_v(tmp0, tmp2);
tmp0 = simd_min_v(tmp0, tmp2);
minClampIndex =
simd_or_v(simd_and_v(minClampIndex, tmp3), simd_andnot_v(simd_move_hl_v(minClampIndex, minClampIndex), tmp3));
campedIndexValue =
simd_or_v(simd_and_v(campedIndexValue, tmp3), simd_andnot_v(simd_move_hl_v(campedIndexValue, campedIndexValue), tmp3));
tmp2 = simd_permut_v(tmp0, tmp0, PURMUT_MASK(0, 0, 0, 1));
tmp3 = simd_cmplt_s(tmp0, tmp2);
tmp0 = simd_min_s(tmp0, tmp2);
minClampIndex =
simd_or_v(simd_and_v(minClampIndex, tmp3), simd_andnot_v(simd_permut_v(minClampIndex, minClampIndex, PURMUT_MASK(0, 0, 0, 1)), tmp3));
campedIndexValue =
simd_or_v(simd_and_v(campedIndexValue, tmp3), simd_andnot_v(simd_permut_v(campedIndexValue, campedIndexValue, PURMUT_MASK(0, 0, 0, 1)), tmp3));
dgFloat32 ak;
simd_store_s(tmp0, &ak);
dgInt32 clampedForceIndex = simd_store_is(minClampIndex);
dgFloat32 clampedForceIndexValue;
simd_store_s(campedIndexValue, &clampedForceIndexValue);
tmp2 = zero;
tmp0 = simd_permut_v(tmp0, tmp0, PURMUT_MASK(0, 0, 0, 0));
// if (ak == dgFloat32 (0.0f)) {
if (ak == dgFloat32(0.0f) && (clampedForceIndex != -1)) {
NEWTON_ASSERT(clampedForceIndex != -1);
// akNum = dgFloat32 (0.0f);
// accNorm = dgFloat32(0.0f);
simd_type tmp1 = zero;
// tmp2 = zero;
simd_type tmp3 = zero;
// for (k = 0; k < count; k ++) {
// for (k = 0; k < roundCount; k ++) {
activeRow[clampedForceIndex] = dgFloat32(0.0f);
deltaForce[clampedForceIndex] = dgFloat32(0.0f);
force[clampedForceIndex] = clampedForceIndexValue;
for (dgInt32 k = 0; k < roundCount; k += DG_SIMD_WORD_SIZE) {
// simd_type val_k;
// simd_type test_0;
// simd_type test_1;
// simd_type accel_k;
// simd_type force_k;
// bool test0;
// bool test1;
// dgFloat32 val;
simd_type accel_k = (simd_type &)accel[k];
simd_type force_k = (simd_type &)force[k];
// val = dgAbsf (lowBound[k] - force[k]);
simd_type val_k =
simd_and_v(simd_sub_v((simd_type &)lowBound[k], force_k), signMask);
// test0 = (val < dgFloat32 (1.0e-5f)) & (accel[k] < dgFloat32 (0.0f));
simd_type test_0 =
simd_and_v(simd_cmplt_v(val_k, tol_pos_1eNeg5), simd_cmplt_v(accel_k, zero));
// val = dgAbsf (highBound[k] - force[k]);
val_k =
simd_and_v(simd_sub_v((simd_type &)highBound[k], force_k), signMask);
// test1 = (val < dgFloat32 (1.0e-5f)) & (accel[k] > dgFloat32 (0.0f));
simd_type test_1 =
simd_and_v(simd_cmplt_v(val_k, tol_pos_1eNeg5), simd_cmpgt_v(accel_k, zero));
// force[k] = test0 ? lowBound[k] : (test1 ? highBound[k] : force[k]);
// val_k = simd_or_v (simd_and_v ((simd_type&)lowBound[k], test_0), simd_and_v ((simd_type&)highBound[k], test_1));
//(simd_type&) force[k] = simd_or_v (simd_and_v (val_k, test_2) , simd_andnot_v (force_k, test_2));
(simd_type &)force[k] =
simd_min_v((simd_type &)highBound[k], simd_max_v((simd_type &)force[k], (simd_type &)lowBound[k]));
// activeRow[k] *= (test0 | test1) ? dgFloat32 (0.0f) : dgFloat32 (1.0f);
// test_2 = simd_or_v (test_0, test_1);
(simd_type &)activeRow[k] =
simd_mul_v((simd_type &)activeRow[k], simd_andnot_v(one, simd_or_v(test_0, test_1)));
// deltaForce[k] = accel[k] * invDJMinvJt[k] * activeRow[k];
(simd_type &)deltaForce[k] =
simd_mul_v(accel_k, simd_mul_v((simd_type &)invDJMinvJt[k], (simd_type &)activeRow[k]));
// akNum += accel[k] * deltaForce[k];
tmp1 = simd_mul_add_v(tmp1, (simd_type &)deltaForce[k], accel_k);
// accNorm = GetMax (dgAbsf (accel[k] * activeRow[k]), accNorm);
tmp2 =
simd_max_v(tmp2, simd_and_v(simd_mul_v(accel_k, (simd_type &)activeRow[k]), signMask));
// maxPases += 1;
tmp3 = simd_add_v(tmp3, (simd_type &)activeRow[k]);
}
tmp1 = simd_add_v(tmp1, simd_move_hl_v(tmp1, tmp1));
tmp1 =
simd_add_s(tmp1, simd_permut_v(tmp1, tmp1, PURMUT_MASK(0, 0, 0, 1)));
simd_store_s(tmp1, &akNum);
tmp2 = simd_max_v(tmp2, simd_move_hl_v(tmp2, tmp2));
tmp2 =
simd_max_s(tmp2, simd_permut_v(tmp2, tmp2, PURMUT_MASK(0, 0, 0, 1)));
simd_store_s(tmp2, &accNorm);
i = -1;
// maxPasses = GetMax (maxPasses - 1, 1);
tmp3 = simd_add_v(tmp3, simd_move_hl_v(tmp3, tmp3));
maxPasses =
simd_store_is(simd_add_s(tmp3, simd_permut_v(tmp3, tmp3, PURMUT_MASK(0, 0, 0, 1))));
} else if (clampedForceIndex >= 0) {
// akNum = dgFloat32(0.0f);
// accNorm = dgFloat32(0.0f);
tmp1 = zero;
activeRow[clampedForceIndex] = dgFloat32(0.0f);
// for (k = 0; k < count; k ++) {
for (dgInt32 k = 0; k < roundCount; k += DG_SIMD_WORD_SIZE) {
// m_force[k] += ak * m_deltaForce[k];
// m_accel[k] -= ak * m_deltaAccel[k];
(simd_type &)force[k] =
simd_mul_add_v((simd_type &)force[k], tmp0, (simd_type &)deltaForce[k]);
(simd_type &)accel[k] =
simd_mul_sub_v((simd_type &)accel[k], tmp0, (simd_type &)deltaAccel[k]);
// accNorm = GetMax (dgAbsf (m_accel[k] * activeRow[k]), accNorm);
tmp2 =
simd_max_v(tmp2, simd_and_v(simd_mul_v((simd_type &)accel[k], (simd_type &)activeRow[k]), signMask));
//NEWTON_ASSERT (dgCheckFloat(m_force[k]));
//NEWTON_ASSERT (dgCheckFloat(m_accel[k]));
// m_deltaForce[k] = m_accel[k] * m_invDJMinvJt[k] * activeRow[k];
(simd_type &)deltaForce[k] =
simd_mul_v((simd_type &)accel[k], simd_mul_v((simd_type &)invDJMinvJt[k], (simd_type &)activeRow[k]));
// akNum += m_deltaForce[k] * m_accel[k];
tmp1 =
simd_mul_add_v(tmp1, (simd_type &)deltaForce[k], (simd_type &)accel[k]);
}
tmp1 = simd_add_v(tmp1, simd_move_hl_v(tmp1, tmp1));
tmp1 =
simd_add_s(tmp1, simd_permut_v(tmp1, tmp1, PURMUT_MASK(0, 0, 0, 1)));
simd_store_s(tmp1, &akNum);
tmp2 = simd_max_v(tmp2, simd_move_hl_v(tmp2, tmp2));
tmp2 =
simd_max_s(tmp2, simd_permut_v(tmp2, tmp2, PURMUT_MASK(0, 0, 0, 1)));
simd_store_s(tmp2, &accNorm);
force[clampedForceIndex] = clampedForceIndexValue;
i = -1;
maxPasses = GetMax(maxPasses - 1, 1);
} else {
// accNorm = dgFloat32(0.0f);
// for (k = 0; k < count; k ++) {
for (dgInt32 k = 0; k < roundCount; k += DG_SIMD_WORD_SIZE) {
// m_force[k] += ak * m_deltaForce[k];
// m_accel[k] -= ak * m_deltaAccel[k];
(simd_type &)force[k] =
simd_mul_add_v((simd_type &)force[k], tmp0, (simd_type &)deltaForce[k]);
(simd_type &)accel[k] =
simd_mul_sub_v((simd_type &)accel[k], tmp0, (simd_type &)deltaAccel[k]);
// accNorm = GetMax (dgAbsf (m_accel[k] * activeRow[k]), accNorm);
tmp2 =
simd_max_v(tmp2, simd_and_v(simd_mul_v((simd_type &)accel[k], (simd_type &)activeRow[k]), signMask));
}
tmp2 = simd_max_v(tmp2, simd_move_hl_v(tmp2, tmp2));
tmp2 =
simd_max_s(tmp2, simd_permut_v(tmp2, tmp2, PURMUT_MASK(0, 0, 0, 1)));
simd_store_s(tmp2, &accNorm);
if (accNorm > maxAccNorm) {
// akDen = akNum;
// akNum = dgFloat32(0.0f);
tmp1 = simd_set1(akNum);
tmp0 = zero;
// for (k = 0; k < count; k ++) {
for (dgInt32 k = 0; k < roundCount; k += DG_SIMD_WORD_SIZE) {
// m_deltaAccel[k] = m_accel[k] * m_invDJMinvJt[k] * activeRow[k];
(simd_type &)deltaAccel[k] =
simd_mul_v((simd_type &)accel[k], simd_mul_v((simd_type &)invDJMinvJt[k], (simd_type &)activeRow[k]));
// akNum += m_accel[k] * m_deltaAccel[k];
tmp0 =
simd_mul_add_v(tmp0, (simd_type &)accel[k], (simd_type &)deltaAccel[k]);
}
tmp0 = simd_add_v(tmp0, simd_move_hl_v(tmp0, tmp0));
tmp0 =
simd_add_s(tmp0, simd_permut_v(tmp0, tmp0, PURMUT_MASK(0, 0, 0, 1)));
simd_store_s(tmp0, &akNum);
//NEWTON_ASSERT (bk > dgFloat32(0.0f));
// bk = GetMax (bk, dgFloat32 (1.0e-17f));
tmp1 = simd_max_s(tmp1, simd_set1(dgFloat32(1.0e-17f)));
// ak = dgFloat32 (akNum / akDen);
tmp0 = simd_div_s(tmp0, tmp1);
tmp0 = simd_permut_v(tmp0, tmp0, PURMUT_MASK(0, 0, 0, 0));
// for (k = 0; k < count; k ++) {
for (dgInt32 k = 0; k < roundCount; k += DG_SIMD_WORD_SIZE) {
// m_deltaForce[k] = m_deltaAccel[k] + ak * m_deltaForce[k];
(simd_type &)deltaForce[k] =
simd_mul_add_v((simd_type &)deltaAccel[k], tmp0, (simd_type &)deltaForce[k]);
}
}
}
}
#else
#endif
}
void dgJacobianMemory::CalculateSimpleBodyReactionsForces(
dgFloat32 maxAccNorm) const {
dgFloat32 accel[DG_CONSTRAINT_MAX_ROWS];
dgFloat32 activeRow[DG_CONSTRAINT_MAX_ROWS];
dgFloat32 lowBound[DG_CONSTRAINT_MAX_ROWS];
dgFloat32 highBound[DG_CONSTRAINT_MAX_ROWS];
dgFloat32 deltaForce[DG_CONSTRAINT_MAX_ROWS];
dgFloat32 deltaAccel[DG_CONSTRAINT_MAX_ROWS];
// dgFloat32* const accel = m_accel;
dgFloat32 *const force = m_force;
const dgJacobianPair *const Jt = m_Jt;
const dgJacobianPair *const JMinv = m_JMinv;
const dgFloat32 *const diagDamp = m_diagDamp;
const dgFloat32 *const invDJMinvJt = m_invDJMinvJt;
const dgFloat32 *const lowerFriction = m_lowerBoundFrictionCoefficent;
const dgFloat32 *const upperFriction = m_upperBoundFrictionCoefficent;
const dgInt32 *const normalForceIndex = m_normalForceIndex;
const dgFloat32 *const coordenateAccel = m_coordenateAccel;
const dgJointInfo *const constraintArray = m_constraintArray;
dgVector zero(dgFloat32(0.0f), dgFloat32(0.0f), dgFloat32(0.0f),
dgFloat32(0.0f));
dgInt32 count = constraintArray[0].m_autoPaircount;
NEWTON_ASSERT(constraintArray[0].m_autoPairstart == 0);
// ak = dgFloat32 (3.0f) * dgSqrt((body1->m_accel % body1->m_accel) + (body0->m_accel % body0->m_accel)) / dgFloat32 (count);
// ak = dgSqrt((body1->m_accel % body1->m_accel) + (body0->m_accel % body0->m_accel));
// dTrace (("%f\n", ak));
dgInt32 maxPasses = count;
for (dgInt32 i = 0; i < count; i++) {
// k = bilateralForceBounds[i].m_normalIndex;
dgInt32 k = normalForceIndex[i];
// force = (k >= 0) ? m_force[k] : dgFloat32 (1.0f);
NEWTON_ASSERT(
((k < 0) && (force[k] == dgFloat32(1.0f))) || ((k >= 0) && (force[k] >= dgFloat32(0.0f))));
dgFloat32 val = force[k];
// if (dgAbsf (val) < dgFloat32 (1.0e-2f)) {
// val = ak;
// }
// lowBound[i] = val * bilateralForceBounds[i].m_low;
// highBound[i] = val * bilateralForceBounds[i].m_upper;
lowBound[i] = val * lowerFriction[i];
highBound[i] = val * upperFriction[i];
activeRow[i] = dgFloat32(1.0f);
if (force[i] < lowBound[i]) {
maxPasses--;
force[i] = lowBound[i];
activeRow[i] = dgFloat32(0.0f);
} else if (force[i] > highBound[i]) {
maxPasses--;
force[i] = highBound[i];
activeRow[i] = dgFloat32(0.0f);
}
}
dgJacobian y0;
dgJacobian y1;
y0.m_linear = zero;
y0.m_angular = zero;
y1.m_linear = zero;
y1.m_angular = zero;
for (dgInt32 i = 0; i < count; i++) {
dgFloat32 val = force[i];
y0.m_linear += Jt[i].m_jacobian_IM0.m_linear.Scale(val);
y0.m_angular += Jt[i].m_jacobian_IM0.m_angular.Scale(val);
y1.m_linear += Jt[i].m_jacobian_IM1.m_linear.Scale(val);
y1.m_angular += Jt[i].m_jacobian_IM1.m_angular.Scale(val);
}
dgFloat32 akNum = dgFloat32(0.0f);
dgFloat32 accNorm = dgFloat32(0.0f);
for (dgInt32 i = 0; i < count; i++) {
dgVector acc(JMinv[i].m_jacobian_IM0.m_linear.CompProduct(y0.m_linear));
acc += JMinv[i].m_jacobian_IM0.m_angular.CompProduct(y0.m_angular);
acc += JMinv[i].m_jacobian_IM1.m_linear.CompProduct(y1.m_linear);
acc += JMinv[i].m_jacobian_IM1.m_angular.CompProduct(y1.m_angular);
accel[i] = coordenateAccel[i] - acc.m_x - acc.m_y - acc.m_z - force[i] * diagDamp[i];
deltaForce[i] = accel[i] * invDJMinvJt[i] * activeRow[i];
akNum += accel[i] * deltaForce[i];
accNorm = GetMax(dgAbsf(accel[i] * activeRow[i]), accNorm);
}
dgFloat32 clampedForceIndexValue = dgFloat32(0.0f);
for (dgInt32 i = 0; (i < maxPasses) && (accNorm > maxAccNorm); i++) {
// MatrixTimeVector (forceRows, m_deltaForce, m_deltaAccel);
y0.m_linear = zero;
y0.m_angular = zero;
y1.m_linear = zero;
y1.m_angular = zero;
for (dgInt32 k = 0; k < count; k++) {
dgFloat32 val = deltaForce[k];
y0.m_linear += Jt[k].m_jacobian_IM0.m_linear.Scale(val);
y0.m_angular += Jt[k].m_jacobian_IM0.m_angular.Scale(val);
y1.m_linear += Jt[k].m_jacobian_IM1.m_linear.Scale(val);
y1.m_angular += Jt[k].m_jacobian_IM1.m_angular.Scale(val);
}
dgFloat32 akDen = dgFloat32(0.0f);
for (dgInt32 k = 0; k < count; k++) {
dgVector acc(JMinv[k].m_jacobian_IM0.m_linear.CompProduct(y0.m_linear));
acc += JMinv[k].m_jacobian_IM0.m_angular.CompProduct(y0.m_angular);
acc += JMinv[k].m_jacobian_IM1.m_linear.CompProduct(y1.m_linear);
acc += JMinv[k].m_jacobian_IM1.m_angular.CompProduct(y1.m_angular);
// m_accel[i0] = m_coordenateAccel[i0] - acc.m_x - acc.m_y - acc.m_z - m_force[i0] * m_diagDamp[i0];
deltaAccel[k] = acc.m_x + acc.m_y + acc.m_z + deltaForce[k] * diagDamp[k];
akDen += deltaAccel[k] * deltaForce[k];
}
NEWTON_ASSERT(akDen > dgFloat32(0.0f));
akDen = GetMax(akDen, dgFloat32(1.0e-16f));
NEWTON_ASSERT(dgAbsf(akDen) >= dgFloat32(1.0e-16f));
dgFloat32 ak = akNum / akDen;
dgInt32 clampedForceIndex = -1;
for (dgInt32 k = 0; k < count; k++) {
if (activeRow[k]) {
dgFloat32 val = force[k] + ak * deltaForce[k];
if (deltaForce[k] < dgFloat32(-1.0e-16f)) {
if (val < lowBound[k]) {
ak = GetMax((lowBound[k] - force[k]) / deltaForce[k],
dgFloat32(0.0f));
clampedForceIndex = k;
clampedForceIndexValue = lowBound[k];
if (ak < dgFloat32(1.0e-8f)) {
ak = dgFloat32(0.0f);
break;
}
}
} else if (deltaForce[k] > dgFloat32(1.0e-16f)) {
if (val >= highBound[k]) {
ak = GetMax((highBound[k] - force[k]) / deltaForce[k],
dgFloat32(0.0f));
;
clampedForceIndex = k;
clampedForceIndexValue = highBound[k];
if (ak < dgFloat32(1.0e-8f)) {
ak = dgFloat32(0.0f);
break;
}
}
}
}
}
// if (ak == dgFloat32 (0.0f)) {
if (ak == dgFloat32(0.0f) && (clampedForceIndex != -1)) {
NEWTON_ASSERT(clampedForceIndex != -1);
akNum = dgFloat32(0.0f);
accNorm = dgFloat32(0.0f);
activeRow[clampedForceIndex] = dgFloat32(0.0f);
deltaForce[clampedForceIndex] = dgFloat32(0.0f);
force[clampedForceIndex] = clampedForceIndexValue;
for (dgInt32 k = 0; k < count; k++) {
if (activeRow[k]) {
dgFloat32 val;
val = lowBound[k] - force[k];
if ((dgAbsf(val) < dgFloat32(1.0e-5f)) && (accel[k] < dgFloat32(0.0f))) {
force[k] = lowBound[k];
activeRow[k] = dgFloat32(0.0f);
deltaForce[k] = dgFloat32(0.0f);
} else {
val = highBound[k] - force[k];
if ((dgAbsf(val) < dgFloat32(1.0e-5f)) && (accel[k] > dgFloat32(0.0f))) {
force[k] = highBound[k];
activeRow[k] = dgFloat32(0.0f);
deltaForce[k] = dgFloat32(0.0f);
} else {
NEWTON_ASSERT(activeRow[k] > dgFloat32(0.0f));
deltaForce[k] = accel[k] * invDJMinvJt[k];
akNum += accel[k] * deltaForce[k];
accNorm = GetMax(dgAbsf(accel[k]), accNorm);
}
}
}
}
i = -1;
maxPasses = GetMax(maxPasses - 1, dgInt32(1));
} else if (clampedForceIndex >= 0) {
akNum = dgFloat32(0.0f);
accNorm = dgFloat32(0.0f);
activeRow[clampedForceIndex] = dgFloat32(0.0f);
for (dgInt32 k = 0; k < count; k++) {
force[k] += ak * deltaForce[k];
accel[k] -= ak * deltaAccel[k];
accNorm = GetMax(dgAbsf(accel[k] * activeRow[k]), accNorm);
NEWTON_ASSERT(dgCheckFloat(force[k]));
NEWTON_ASSERT(dgCheckFloat(accel[k]));
deltaForce[k] = accel[k] * invDJMinvJt[k] * activeRow[k];
akNum += deltaForce[k] * accel[k];
}
force[clampedForceIndex] = clampedForceIndexValue;
i = -1;
maxPasses = GetMax(maxPasses - 1, dgInt32(1));
} else {
accNorm = dgFloat32(0.0f);
for (dgInt32 k = 0; k < count; k++) {
force[k] += ak * deltaForce[k];
accel[k] -= ak * deltaAccel[k];
accNorm = GetMax(dgAbsf(accel[k] * activeRow[k]), accNorm);
NEWTON_ASSERT(dgCheckFloat(force[k]));
NEWTON_ASSERT(dgCheckFloat(accel[k]));
}
if (accNorm > maxAccNorm) {
akDen = akNum;
akNum = dgFloat32(0.0f);
for (dgInt32 k = 0; k < count; k++) {
deltaAccel[k] = accel[k] * invDJMinvJt[k] * activeRow[k];
akNum += accel[k] * deltaAccel[k];
}
NEWTON_ASSERT(akDen > dgFloat32(0.0f));
akDen = GetMax(akDen, dgFloat32(1.0e-17f));
ak = dgFloat32(akNum / akDen);
for (dgInt32 k = 0; k < count; k++) {
deltaForce[k] = deltaAccel[k] + ak * deltaForce[k];
}
}
}
}
}
void dgJacobianMemory::CalculateForcesSimulationModeSimd(
dgFloat32 maxAccNorm) const {
#ifdef DG_BUILD_SIMD_CODE
dgInt32 passes;
dgInt32 prevJoint;
dgInt32 maxPasses;
dgInt32 forceRows;
dgInt32 roundBodyCount;
dgInt32 totalPassesCount;
dgFloatSign tmpIndex;
dgFloat32 akNum;
dgFloat32 accNorm;
dgFloat32 *const force = m_force;
dgFloat32 *const accel = m_accel;
const dgJacobianPair *const Jt = m_Jt;
dgFloat32 *const deltaAccel = m_deltaAccel;
dgFloat32 *const deltaForce = m_deltaForce;
const dgJacobianPair *const JMinv = m_JMinv;
const dgFloat32 *const diagDamp = m_diagDamp;
const dgFloat32 *const invDJMinvJt = m_invDJMinvJt;
dgJacobian *const internalForces = m_internalForces;
dgInt32 *const normalForceIndex = m_normalForceIndex;
dgJointInfo *const constraintArray = m_constraintArray;
const dgFloat32 *const coordenateAccel = m_coordenateAccel;
dgFloat32 *const lowerForceBound = m_lowerBoundFrictionCoefficent;
dgFloat32 *const upperForceBound = m_upperBoundFrictionCoefficent;
simd_type forceStepPtr[DG_CONSTRAINT_MAX_ROWS / DG_SIMD_WORD_SIZE];
dgFloat32 *const forceStep = (dgFloat32 *)forceStepPtr;
simd_type one;
simd_type zero;
simd_type signMask;
simd_type tol_pos_1eNeg8;
simd_type tol_pos_1eNeg16;
simd_type tol_neg_1eNeg16;
zero = simd_set1(dgFloat32(0.0f));
roundBodyCount = m_bodyCount & -4;
for (dgInt32 i = 0; i < roundBodyCount; i += 4) {
((simd_type &)internalForces[i + 0].m_linear) = zero;
((simd_type &)internalForces[i + 0].m_angular) = zero;
((simd_type &)internalForces[i + 1].m_linear) = zero;
((simd_type &)internalForces[i + 1].m_angular) = zero;
((simd_type &)internalForces[i + 2].m_linear) = zero;
((simd_type &)internalForces[i + 2].m_angular) = zero;
((simd_type &)internalForces[i + 3].m_linear) = zero;
((simd_type &)internalForces[i + 3].m_angular) = zero;
}
for (dgInt32 i = roundBodyCount; i < m_bodyCount; i++) {
((simd_type &)internalForces[i].m_linear) = zero;
((simd_type &)internalForces[i].m_angular) = zero;
}
for (dgInt32 i = 0; i < m_jointCount; i++) {
dgInt32 j;
dgInt32 m0;
dgInt32 m1;
dgInt32 index;
dgInt32 first;
dgInt32 count;
simd_type y0_linear;
simd_type y0_angular;
simd_type y1_linear;
simd_type y1_angular;
first = constraintArray[i].m_autoPairstart;
count = constraintArray[i].m_autoPairActiveCount;
m0 = constraintArray[i].m_m0;
m1 = constraintArray[i].m_m1;
y0_linear = zero;
y0_angular = zero;
y1_linear = zero;
y1_angular = zero;
for (j = 0; j < count; j++) {
simd_type tmp0;
index = first + j;
// force = m_force[index];
tmp0 = simd_set1(force[index]);
// y0.m_linear += m_Jt[index].m_jacobian_IM0.m_linear.Scale (m_force[index]);
// y0.m_angular += m_Jt[index].m_jacobian_IM0.m_angular.Scale (m_force[index]);
// y1.m_linear += m_Jt[index].m_jacobian_IM1.m_linear.Scale (m_force[index]);
// y1.m_angular += m_Jt[index].m_jacobian_IM1.m_angular.Scale (m_force[index]);
y0_linear =
simd_mul_add_v(y0_linear, (simd_type &)Jt[index].m_jacobian_IM0.m_linear, tmp0);
y0_angular =
simd_mul_add_v(y0_angular, (simd_type &)Jt[index].m_jacobian_IM0.m_angular, tmp0);
y1_linear =
simd_mul_add_v(y1_linear, (simd_type &)Jt[index].m_jacobian_IM1.m_linear, tmp0);
y1_angular =
simd_mul_add_v(y1_angular, (simd_type &)Jt[index].m_jacobian_IM1.m_angular, tmp0);
}
// m_internalForces[j0] = y0;
// m_internalForces[j1] = y1;
(simd_type &)internalForces[m0].m_linear =
simd_add_v((simd_type &)internalForces[m0].m_linear, y0_linear);
(simd_type &)internalForces[m0].m_angular =
simd_add_v((simd_type &)internalForces[m0].m_angular, y0_angular);
(simd_type &)internalForces[m1].m_linear =
simd_add_v((simd_type &)internalForces[m1].m_linear, y1_linear);
(simd_type &)internalForces[m1].m_angular =
simd_add_v((simd_type &)internalForces[m1].m_angular, y1_angular);
}
for (dgInt32 i = 0; i < DG_CONSTRAINT_MAX_ROWS; i += (DG_SIMD_WORD_SIZE * 4)) {
(simd_type &)forceStep[i + DG_SIMD_WORD_SIZE * 0] = zero;
(simd_type &)forceStep[i + DG_SIMD_WORD_SIZE * 1] = zero;
(simd_type &)forceStep[i + DG_SIMD_WORD_SIZE * 2] = zero;
(simd_type &)forceStep[i + DG_SIMD_WORD_SIZE * 3] = zero;
}
maxPasses = 4;
prevJoint = 0;
accNorm = maxAccNorm * dgFloat32(2.0f);
for (passes = 0; (passes < maxPasses) && (accNorm > maxAccNorm); passes++) {
accNorm = dgFloat32(0.0f);
for (dgInt32 currJoint = 0; currJoint < m_jointCount; currJoint++) {
dgInt32 m0;
dgInt32 m1;
dgInt32 index;
// dgInt32 currJoint;
dgInt32 rowsCount;
dgFloat32 jointAccel;
simd_type y0_linear;
simd_type y0_angular;
simd_type y1_linear;
simd_type y1_angular;
index = constraintArray[prevJoint].m_autoPairstart;
rowsCount = constraintArray[prevJoint].m_autoPaircount;
m0 = constraintArray[prevJoint].m_m0;
m1 = constraintArray[prevJoint].m_m1;
y0_linear = zero;
y0_angular = zero;
y1_linear = zero;
y1_angular = zero;
for (dgInt32 i = 0; i < rowsCount; i++) {
// dgFloat32 deltaForce;
// deltaForce = forceStep[i];
// y0.m_linear += Jt[index].m_jacobian_IM0.m_linear.Scale (deltaForce);
// y0.m_angular += Jt[index].m_jacobian_IM0.m_angular.Scale (deltaForce);
// y1.m_linear += Jt[index].m_jacobian_IM1.m_linear.Scale (deltaForce);
// y1.m_angular += Jt[index].m_jacobian_IM1.m_angular.Scale (deltaForce);
simd_type deltaForce;
deltaForce = simd_set1(forceStep[i]);
y0_linear =
simd_mul_add_v(y0_linear, (simd_type &)Jt[index].m_jacobian_IM0.m_linear, deltaForce);
y0_angular =
simd_mul_add_v(y0_angular, (simd_type &)Jt[index].m_jacobian_IM0.m_angular, deltaForce);
y1_linear =
simd_mul_add_v(y1_linear, (simd_type &)Jt[index].m_jacobian_IM1.m_linear, deltaForce);
y1_angular =
simd_mul_add_v(y1_angular, (simd_type &)Jt[index].m_jacobian_IM1.m_angular, deltaForce);
index++;
}
(simd_type &)internalForces[m0].m_linear =
simd_add_v((simd_type &)internalForces[m0].m_linear, y0_linear);
(simd_type &)internalForces[m0].m_angular =
simd_add_v((simd_type &)internalForces[m0].m_angular, y0_angular);
(simd_type &)internalForces[m1].m_linear =
simd_add_v((simd_type &)internalForces[m1].m_linear, y1_linear);
(simd_type &)internalForces[m1].m_angular =
simd_add_v((simd_type &)internalForces[m1].m_angular, y1_angular);
// currJoint = jointRemapArray[j];
index = constraintArray[currJoint].m_autoPairstart;
rowsCount = constraintArray[currJoint].m_autoPaircount;
m0 = constraintArray[currJoint].m_m0;
m1 = constraintArray[currJoint].m_m1;
// y0 = internalForces[m0];
// y1 = internalForces[m1];
y0_linear = (simd_type &)internalForces[m0].m_linear;
y0_angular = (simd_type &)internalForces[m0].m_angular;
y1_linear = (simd_type &)internalForces[m1].m_linear;
y1_angular = (simd_type &)internalForces[m1].m_angular;
for (dgInt32 i = 0; i < rowsCount; i++) {
simd_type tmpAccel;
// dgVector acc (JMinv[index].m_jacobian_IM0.m_linear.CompProduct(y0.m_linear));
// acc += JMinv[index].m_jacobian_IM0.m_angular.CompProduct (y0.m_angular);
// acc += JMinv[index].m_jacobian_IM1.m_linear.CompProduct (y1.m_linear);
// acc += JMinv[index].m_jacobian_IM1.m_angular.CompProduct (y1.m_angular);
tmpAccel =
simd_mul_v((simd_type &)JMinv[index].m_jacobian_IM0.m_linear, y0_linear);
tmpAccel =
simd_mul_add_v(tmpAccel, (simd_type &)JMinv[index].m_jacobian_IM0.m_angular, y0_angular);
tmpAccel =
simd_mul_add_v(tmpAccel, (simd_type &)JMinv[index].m_jacobian_IM1.m_linear, y1_linear);
tmpAccel =
simd_mul_add_v(tmpAccel, (simd_type &)JMinv[index].m_jacobian_IM1.m_angular, y1_angular);
// accel[i] = coordenateAccel[index] - acc.m_x - acc.m_y - acc.m_z - force[index] * diagDamp[index];
tmpAccel = simd_add_v(tmpAccel, simd_move_hl_v(tmpAccel, tmpAccel));
tmpAccel =
simd_add_s(tmpAccel, simd_permut_v(tmpAccel, tmpAccel, PURMUT_MASK(3, 3, 3, 1)));
tmpAccel =
simd_mul_add_s(tmpAccel, simd_load_s(force[index]), simd_load_s(diagDamp[index]));
simd_store_s(simd_sub_s(simd_load_s(coordenateAccel[index]), tmpAccel),
&accel[i]);
index++;
}
jointAccel = CalculateJointForcesSimd(currJoint, forceStep, maxAccNorm);
accNorm = GetMax(accNorm, jointAccel);
prevJoint = currJoint;
}
}
one = simd_set1(dgFloat32(1.0f));
for (dgInt32 i = 0; i < m_jointCount; i++) {
dgInt32 first;
dgInt32 count;
dgInt32 index;
dgInt32 roundCount;
first = constraintArray[i].m_autoPairstart;
count = constraintArray[i].m_autoPairActiveCount;
roundCount = count & (-DG_SIMD_WORD_SIZE);
if (roundCount != count) {
roundCount += DG_SIMD_WORD_SIZE;
for (dgInt32 k = count; k < roundCount; k++) {
dgInt32 j;
j = first + k;
// force[j] = dgFloat32 (0.0f);
// accel[j] -= dgFloat32 (0.0f);
// deltaAccel[j] = dgFloat32 (0.0f);
// deltaForce[j] = dgFloat32 (0.0f);
normalForceIndex[j] = -1;
simd_store_s(zero, &force[j]);
simd_store_s(zero, &accel[j]);
simd_store_s(zero, &deltaAccel[j]);
simd_store_s(zero, &deltaForce[j]);
simd_store_s(one, &lowerForceBound[j]);
simd_store_s(zero, &upperForceBound[j]);
}
}
for (dgInt32 k = 0; k < count; k += DG_SIMD_WORD_SIZE) {
simd_type normalForce;
index = first + k;
// dgBilateralBounds& forceBounds = bilateralForceBounds[index];
// j = normalForceIndex[index];
//NEWTON_ASSERT (((j < 0) && (force[j] == dgFloat32 (1.0f))) || ((j >= 0) && (force[j] >= dgFloat32 (0.0f))));
// val = GetMax (force[j], dgFloat32(0.0f));
// val = force[j];
normalForce =
simd_move_lh_v(simd_pack_lo_v(simd_load_s(force[normalForceIndex[index + 0]]), simd_load_s(force[normalForceIndex[index + 1]])), simd_pack_lo_v(simd_load_s(force[normalForceIndex[index + 2]]), simd_load_s(force[normalForceIndex[index + 3]])));
// lowerForceBound[index] *= val;
// upperForceBound[index] *= val;
(simd_type &)lowerForceBound[index] =
simd_mul_v(normalForce, (simd_type &)lowerForceBound[index]);
(simd_type &)upperForceBound[index] =
simd_mul_v(normalForce, (simd_type &)upperForceBound[index]);
// force[index] = ClampValue(force[index], lowerForceBound[index], upperForceBound[index]);
(simd_type &)force[index] =
simd_min_v((simd_type &)upperForceBound[index], simd_max_v((simd_type &)force[index], (simd_type &)lowerForceBound[index]));
}
}
for (dgInt32 i = 0; i < roundBodyCount; i += 4) {
(simd_type &)internalForces[i + 0].m_linear = zero;
(simd_type &)internalForces[i + 0].m_angular = zero;
(simd_type &)internalForces[i + 1].m_linear = zero;
(simd_type &)internalForces[i + 1].m_angular = zero;
(simd_type &)internalForces[i + 2].m_linear = zero;
(simd_type &)internalForces[i + 2].m_angular = zero;
(simd_type &)internalForces[i + 3].m_linear = zero;
(simd_type &)internalForces[i + 3].m_angular = zero;
}
for (dgInt32 i = roundBodyCount; i < m_bodyCount; i++) {
(simd_type &)internalForces[i].m_linear = zero;
(simd_type &)internalForces[i].m_angular = zero;
}
for (dgInt32 i = 0; i < m_jointCount; i++) {
dgInt32 m0;
dgInt32 m1;
dgInt32 first;
dgInt32 count;
dgInt32 index;
// dgFloat32 val;
simd_type y0_linear;
simd_type y0_angular;
simd_type y1_linear;
simd_type y1_angular;
first = constraintArray[i].m_autoPairstart;
count = constraintArray[i].m_autoPairActiveCount;
m0 = constraintArray[i].m_m0;
m1 = constraintArray[i].m_m1;
// dgJacobian y0 (internalForces[k0]);
// dgJacobian y1 (internalForces[k1]);
y0_linear = zero;
y0_angular = zero;
y1_linear = zero;
y1_angular = zero;
for (dgInt32 j = 0; j < count; j++) {
simd_type tmp0;
index = j + first;
// val = force[index];
tmp0 = simd_set1(force[index]);
// y0.m_linear += Jt[index].m_jacobian_IM0.m_linear.Scale (val);
// y0.m_angular += Jt[index].m_jacobian_IM0.m_angular.Scale (val);
// y1.m_linear += Jt[index].m_jacobian_IM1.m_linear.Scale (val);
// y1.m_angular += Jt[index].m_jacobian_IM1.m_angular.Scale (val);
y0_linear =
simd_mul_add_v(y0_linear, (simd_type &)Jt[index].m_jacobian_IM0.m_linear, tmp0);
y0_angular =
simd_mul_add_v(y0_angular, (simd_type &)Jt[index].m_jacobian_IM0.m_angular, tmp0);
y1_linear =
simd_mul_add_v(y1_linear, (simd_type &)Jt[index].m_jacobian_IM1.m_linear, tmp0);
y1_angular =
simd_mul_add_v(y1_angular, (simd_type &)Jt[index].m_jacobian_IM1.m_angular, tmp0);
}
// internalForces[k0] = y0;
// internalForces[k1] = y1;
(simd_type &)internalForces[m0].m_linear =
simd_add_v((simd_type &)internalForces[m0].m_linear, y0_linear);
(simd_type &)internalForces[m0].m_angular =
simd_add_v((simd_type &)internalForces[m0].m_angular, y0_angular);
(simd_type &)internalForces[m1].m_linear =
simd_add_v((simd_type &)internalForces[m1].m_linear, y1_linear);
(simd_type &)internalForces[m1].m_angular =
simd_add_v((simd_type &)internalForces[m1].m_angular, y1_angular);
}
forceRows = 0;
akNum = dgFloat32(0.0f);
accNorm = dgFloat32(0.0f);
for (dgInt32 i = 0; i < m_jointCount; i++) {
dgInt32 m0;
dgInt32 m1;
dgInt32 first;
dgInt32 count;
dgInt32 index;
dgInt32 activeCount;
simd_type y0_linear;
simd_type y0_angular;
simd_type y1_linear;
simd_type y1_angular;
bool isClamped[DG_CONSTRAINT_MAX_ROWS];
first = constraintArray[i].m_autoPairstart;
count = constraintArray[i].m_autoPairActiveCount;
m0 = constraintArray[i].m_m0;
m1 = constraintArray[i].m_m1;
// const dgJacobian& y0 = internalForces[m0];
// const dgJacobian& y1 = internalForces[m1];
y0_linear = (simd_type &)internalForces[m0].m_linear;
y0_angular = (simd_type &)internalForces[m0].m_angular;
y1_linear = (simd_type &)internalForces[m1].m_linear;
y1_angular = (simd_type &)internalForces[m1].m_angular;
for (dgInt32 j = 0; j < count; j++) {
simd_type tmp0;
index = j + first;
// dgVector tmpAccel (JMinv[index].m_jacobian_IM0.m_linear.CompProduct(y0.m_linear));
// tmpAccel += JMinv[index].m_jacobian_IM0.m_angular.CompProduct(y0.m_angular);
// tmpAccel += JMinv[index].m_jacobian_IM1.m_linear.CompProduct(y1.m_linear);
// tmpAccel += JMinv[index].m_jacobian_IM1.m_angular.CompProduct(y1.m_angular);
tmp0 =
simd_mul_v((simd_type &)JMinv[index].m_jacobian_IM0.m_linear, y0_linear);
tmp0 =
simd_mul_add_v(tmp0, (simd_type &)JMinv[index].m_jacobian_IM0.m_angular, y0_angular);
tmp0 =
simd_mul_add_v(tmp0, (simd_type &)JMinv[index].m_jacobian_IM1.m_linear, y1_linear);
tmp0 =
simd_mul_add_v(tmp0, (simd_type &)JMinv[index].m_jacobian_IM1.m_angular, y1_angular);
// accel[index] = coordenateAccel[index] - (tmpAccel.m_x + tmpAccel.m_y + tmpAccel.m_z + force[index] * diagDamp[index]);
tmp0 = simd_add_v(tmp0, simd_move_hl_v(tmp0, tmp0));
tmp0 =
simd_add_s(tmp0, simd_permut_v(tmp0, tmp0, PURMUT_MASK(3, 3, 3, 1)));
tmp0 =
simd_sub_s(simd_load_s(coordenateAccel[index]), simd_mul_add_s(tmp0, simd_load_s(force[index]), simd_load_s(diagDamp[index])));
simd_store_s(tmp0, &accel[index]);
}
activeCount = 0;
for (dgInt32 j = 0; j < count; j++) {
dgFloat32 val;
index = j + first;
val = lowerForceBound[index] - force[index];
if ((dgAbsf(val) < dgFloat32(1.0e-5f)) && (accel[index] < dgFloat32(0.0f))) {
force[index] = lowerForceBound[index];
accel[index] = dgFloat32(0.0f);
deltaForce[index] = dgFloat32(0.0f);
deltaAccel[index] = dgFloat32(0.0f);
isClamped[j] = true;
} else {
val = upperForceBound[index] - force[index];
if ((dgAbsf(val) < dgFloat32(1.0e-5f)) && (accel[index] > dgFloat32(0.0f))) {
force[index] = upperForceBound[index];
accel[index] = dgFloat32(0.0f);
deltaForce[index] = dgFloat32(0.0f);
deltaAccel[index] = dgFloat32(0.0f);
isClamped[j] = true;
} else {
forceRows++;
activeCount++;
deltaForce[index] = accel[index] * invDJMinvJt[index];
akNum += accel[index] * deltaForce[index];
accNorm = GetMax(dgAbsf(accel[index]), accNorm);
isClamped[j] = false;
}
}
}
if (activeCount < count) {
dgInt32 i0;
dgInt32 i1;
i0 = 0;
i1 = count - 1;
constraintArray[i].m_autoPairActiveCount = activeCount;
do {
while ((i0 <= i1) && !isClamped[i0])
i0++;
while ((i0 <= i1) && isClamped[i1])
i1--;
if (i0 < i1) {
SwapRowsSimd(first + i0, first + i1);
i0++;
i1--;
}
} while (i0 < i1);
}
}
// maxPasses = 0x7fffffff;
// signMask = simd_set1((dgFloat32&) maxPasses);
tmpIndex.m_integer.m_iVal = 0x7fffffff;
signMask = simd_set1(tmpIndex.m_fVal);
zero = simd_set1(dgFloat32(0.0f));
tol_pos_1eNeg8 = simd_set1(dgFloat32(1.0e-8f));
tol_pos_1eNeg16 = simd_set1(dgFloat32(1.0e-16f));
tol_neg_1eNeg16 = simd_set1(dgFloat32(-1.0e-16f));
maxPasses = forceRows;
totalPassesCount = 0;
for (passes = 0; (passes < maxPasses) && (accNorm > maxAccNorm); passes++) {
dgInt32 clampedForceIndex;
dgInt32 clampedForceJoint;
// dgFloat32 ak;
// dgFloat32 akDen;
// dgFloat32 clampedForceIndexValue;
simd_type akSimd;
simd_type akDenSimd;
simd_type clampedForceIndexValue;
for (dgInt32 i = 0; i < roundBodyCount; i += 4) {
(simd_type &)internalForces[i + 0].m_linear = zero;
(simd_type &)internalForces[i + 0].m_angular = zero;
(simd_type &)internalForces[i + 1].m_linear = zero;
(simd_type &)internalForces[i + 1].m_angular = zero;
(simd_type &)internalForces[i + 2].m_linear = zero;
(simd_type &)internalForces[i + 2].m_angular = zero;
(simd_type &)internalForces[i + 3].m_linear = zero;
(simd_type &)internalForces[i + 3].m_angular = zero;
}
for (dgInt32 i = roundBodyCount; i < m_bodyCount; i++) {
(simd_type &)internalForces[i].m_linear = zero;
(simd_type &)internalForces[i].m_angular = zero;
}
for (dgInt32 i = 0; i < m_jointCount; i++) {
dgInt32 j;
dgInt32 m0;
dgInt32 m1;
dgInt32 first;
dgInt32 count;
dgInt32 index;
// dgFloat32 ak;
simd_type y0_linear;
simd_type y0_angular;
simd_type y1_linear;
simd_type y1_angular;
first = constraintArray[i].m_autoPairstart;
count = constraintArray[i].m_autoPairActiveCount;
m0 = constraintArray[i].m_m0;
m1 = constraintArray[i].m_m1;
// dgJacobian y0 (internalForces[m0]);
// dgJacobian y1 (internalForces[m1]);
y0_linear = zero;
y0_angular = zero;
y1_linear = zero;
y1_angular = zero;
for (j = 0; j < count; j++) {
simd_type tmp0;
index = j + first;
// ak = deltaForce[index];
tmp0 = simd_set1(deltaForce[index]);
// y0.m_linear += Jt[index].m_jacobian_IM0.m_linear.Scale (ak);
// y0.m_angular += Jt[index].m_jacobian_IM0.m_angular.Scale (ak);
// y1.m_linear += Jt[index].m_jacobian_IM1.m_linear.Scale (ak);
// y1.m_angular += Jt[index].m_jacobian_IM1.m_angular.Scale (ak);
y0_linear =
simd_mul_add_v(y0_linear, (simd_type &)Jt[index].m_jacobian_IM0.m_linear, tmp0);
y0_angular =
simd_mul_add_v(y0_angular, (simd_type &)Jt[index].m_jacobian_IM0.m_angular, tmp0);
y1_linear =
simd_mul_add_v(y1_linear, (simd_type &)Jt[index].m_jacobian_IM1.m_linear, tmp0);
y1_angular =
simd_mul_add_v(y1_angular, (simd_type &)Jt[index].m_jacobian_IM1.m_angular, tmp0);
}
// internalForces[m0] = y0;
// internalForces[m1] = y1;
(simd_type &)internalForces[m0].m_linear =
simd_add_v((simd_type &)internalForces[m0].m_linear, y0_linear);
(simd_type &)internalForces[m0].m_angular =
simd_add_v((simd_type &)internalForces[m0].m_angular, y0_angular);
(simd_type &)internalForces[m1].m_linear =
simd_add_v((simd_type &)internalForces[m1].m_linear, y1_linear);
(simd_type &)internalForces[m1].m_angular =
simd_add_v((simd_type &)internalForces[m1].m_angular, y1_angular);
}
akDenSimd = zero;
for (dgInt32 i = 0; i < m_jointCount; i++) {
dgInt32 j;
dgInt32 m0;
dgInt32 m1;
dgInt32 first;
dgInt32 count;
dgInt32 index;
simd_type y0_linear;
simd_type y0_angular;
simd_type y1_linear;
simd_type y1_angular;
first = constraintArray[i].m_autoPairstart;
count = constraintArray[i].m_autoPairActiveCount;
m0 = constraintArray[i].m_m0;
m1 = constraintArray[i].m_m1;
// const dgJacobian& y0 = internalForces[k0];
// const dgJacobian& y1 = internalForces[k1];
y0_linear = (simd_type &)internalForces[m0].m_linear;
y0_angular = (simd_type &)internalForces[m0].m_angular;
y1_linear = (simd_type &)internalForces[m1].m_linear;
y1_angular = (simd_type &)internalForces[m1].m_angular;
for (j = 0; j < count; j++) {
simd_type tmp1;
simd_type tmp2;
index = j + first;
// dgVector tmpAccel (JMinv[index].m_jacobian_IM0.m_linear.CompProduct(y0.m_linear));
// tmpAccel += JMinv[index].m_jacobian_IM0.m_angular.CompProduct(y0.m_angular);
// tmpAccel += JMinv[index].m_jacobian_IM1.m_linear.CompProduct(y1.m_linear);
// tmpAccel += JMinv[index].m_jacobian_IM1.m_angular.CompProduct(y1.m_angular);
tmp2 =
simd_mul_v((simd_type &)JMinv[index].m_jacobian_IM0.m_linear, y0_linear);
tmp2 =
simd_mul_add_v(tmp2, (simd_type &)JMinv[index].m_jacobian_IM0.m_angular, y0_angular);
tmp2 =
simd_mul_add_v(tmp2, (simd_type &)JMinv[index].m_jacobian_IM1.m_linear, y1_linear);
tmp2 =
simd_mul_add_v(tmp2, (simd_type &)JMinv[index].m_jacobian_IM1.m_angular, y1_angular);
// deltaAccel[index] = tmpAccel.m_x + tmpAccel.m_y + tmpAccel.m_z + deltaForce[index] * diagDamp[index];
tmp1 = simd_load_s(deltaForce[index]);
tmp2 = simd_add_v(tmp2, simd_move_hl_v(tmp2, tmp2));
tmp2 =
simd_add_s(tmp2, simd_permut_v(tmp2, tmp2, PURMUT_MASK(3, 3, 3, 1)));
tmp2 = simd_mul_add_s(tmp2, tmp1, simd_load_s(diagDamp[index]));
simd_store_s(tmp2, &deltaAccel[index]);
// akDen += deltaAccel[index] * deltaForce[index];
akDenSimd = simd_mul_add_s(akDenSimd, tmp2, tmp1);
}
}
// NEWTON_ASSERT (akDen > dgFloat32 (0.0f));
// akDen = GetMax (akDen, dgFloat32(1.0e-16f));
// NEWTON_ASSERT (dgAbsf (akDen) >= dgFloat32(1.0e-16f));
// ak = akNum / akDen;
akSimd =
simd_div_s(simd_load_s(akNum), simd_max_s(akDenSimd, tol_pos_1eNeg16));
clampedForceIndex = -1;
clampedForceJoint = -1;
clampedForceIndexValue = zero;
for (dgInt32 i = 0; i < m_jointCount; i++) {
dgInt32 j;
dgInt32 first;
dgInt32 count;
dgInt32 index;
if (simd_store_is(simd_cmpgt_s(akSimd, tol_pos_1eNeg8))) {
first = constraintArray[i].m_autoPairstart;
count = constraintArray[i].m_autoPairActiveCount;
for (j = 0; j < count; j++) {
simd_type tmp1;
simd_type test0;
simd_type test1;
index = j + first;
// tmp = force[index] + ak * deltaForce[index];
simd_type force_index;
simd_type delta_force;
simd_type low_force;
simd_type high_force;
// Make sure AK is not negative
force_index = simd_set1(force[index]);
delta_force = simd_set1(deltaForce[index]);
low_force = simd_set1(lowerForceBound[index]);
high_force = simd_set1(upperForceBound[index]);
tmp1 = simd_mul_add_s(force_index, akSimd, delta_force);
// test0 = (deltaForce[index] < dgFloat32 (-1.0e-16f)) & (tmp < lowerForceBound[index]);
test0 =
simd_and_v(simd_cmplt_s(delta_force, tol_neg_1eNeg16), simd_cmplt_s(tmp1, low_force));
// test1 = (deltaForce[index] > dgFloat32 ( 1.0e-16f)) & (tmp > upperForceBound[index]);
test1 =
simd_and_v(simd_cmpgt_s(delta_force, tol_pos_1eNeg16), simd_cmpgt_s(tmp1, high_force));
// test = test0 | test1;
if (simd_store_is(simd_or_v(test0, test1))) {
// clampedForceIndexValue = lowerForceBound[index];
clampedForceIndexValue =
simd_or_v(simd_and_v(low_force, test0), simd_and_v(high_force, test1));
akSimd =
simd_div_s(simd_sub_s(clampedForceIndexValue, force_index), delta_force);
// clampedForceIndex = test ? j : clampedForceIndex;
// clampedForceJoint = test ? i : clampedForceJoint;
clampedForceIndex = j;
clampedForceJoint = i;
}
}
}
}
akSimd = simd_permut_v(akSimd, akSimd, PURMUT_MASK(0, 0, 0, 0));
if (clampedForceIndex >= 0) {
dgInt32 first;
dgInt32 count;
dgInt32 activeCount;
simd_type tmp0;
simd_type tmp1;
bool isClamped[DG_CONSTRAINT_MAX_ROWS];
for (dgInt32 i = 0; i < m_jointCount; i++) {
dgInt32 j;
dgInt32 first;
dgInt32 count;
dgInt32 index;
first = constraintArray[i].m_autoPairstart;
count = constraintArray[i].m_autoPairActiveCount;
for (j = 0; j < count; j += DG_SIMD_WORD_SIZE) {
index = j + first;
// force[index] += ak * deltaForce[index];
// accel[index] -= ak * deltaAccel[index];
(simd_type &)force[index] =
simd_mul_add_v((simd_type &)force[index], akSimd, (simd_type &)deltaForce[index]);
(simd_type &)accel[index] =
simd_mul_sub_v((simd_type &)accel[index], akSimd, (simd_type &)deltaAccel[index]);
}
}
first = constraintArray[clampedForceJoint].m_autoPairstart;
count = constraintArray[clampedForceJoint].m_autoPairActiveCount;
count--;
// force[first + clampedForceIndex] = clampedForceIndexValue;
simd_store_s(clampedForceIndexValue, &force[first + clampedForceIndex]);
accel[first + clampedForceIndex] = dgFloat32(0.0f);
deltaForce[first + clampedForceIndex] = dgFloat32(0.0f);
deltaAccel[first + clampedForceIndex] = dgFloat32(0.0f);
if (clampedForceIndex != count) {
SwapRowsSimd(first + clampedForceIndex, first + count);
}
activeCount = count;
for (dgInt32 i = 0; i < count; i++) {
dgInt32 index;
dgFloat32 val;
index = first + i;
isClamped[i] = false;
val = lowerForceBound[index] - force[index];
if ((val > dgFloat32(-1.0e-5f)) && (accel[index] < dgFloat32(0.0f))) {
activeCount--;
isClamped[i] = true;
accel[index] = dgFloat32(0.0f);
deltaForce[index] = dgFloat32(0.0f);
deltaAccel[index] = dgFloat32(0.0f);
} else {
val = upperForceBound[index] - force[index];
if ((val < dgFloat32(1.0e-5f)) && (accel[index] > dgFloat32(0.0f))) {
activeCount--;
isClamped[i] = true;
accel[index] = dgFloat32(0.0f);
deltaForce[index] = dgFloat32(0.0f);
deltaAccel[index] = dgFloat32(0.0f);
}
}
}
if (activeCount < count) {
dgInt32 i0;
dgInt32 i1;
i0 = 0;
i1 = count - 1;
do {
while ((i0 <= i1) && !isClamped[i0])
i0++;
while ((i0 <= i1) && isClamped[i1])
i1--;
if (i0 < i1) {
SwapRowsSimd(first + i0, first + i1);
i0++;
i1--;
}
} while (i0 < i1);
}
constraintArray[clampedForceJoint].m_autoPairActiveCount = activeCount;
forceRows = 0;
// akNum = dgFloat32 (0.0f);
// accNorm = dgFloat32(0.0f);
tmp0 = zero;
tmp1 = zero;
for (dgInt32 i = 0; i < m_jointCount; i++) {
dgInt32 j;
dgInt32 first;
dgInt32 count;
dgInt32 index;
first = constraintArray[i].m_autoPairstart;
count = constraintArray[i].m_autoPairActiveCount;
forceRows += count;
// for (j = 0; j < count; j ++) {
for (j = 0; j < count; j += DG_SIMD_WORD_SIZE) {
index = first + j;
//NEWTON_ASSERT ((i != clampedForceJoint) || !((dgAbsf (lowerForceBound[index] - force[index]) < dgFloat32 (1.0e-5f)) && (accel[index] < dgFloat32 (0.0f))));
//NEWTON_ASSERT ((i != clampedForceJoint) || !((dgAbsf (upperForceBound[index] - force[index]) < dgFloat32 (1.0e-5f)) && (accel[index] > dgFloat32 (0.0f))));
// deltaForce[index] = accel[index] * invDJMinvJt[index];
(simd_type &)deltaForce[index] =
simd_mul_v((simd_type &)accel[index], (simd_type &)invDJMinvJt[index]);
// akNum += deltaForce[index] * accel[index];
tmp0 =
simd_mul_add_v(tmp0, (simd_type &)accel[index], (simd_type &)deltaForce[index]);
// accNorm = GetMax (dgAbsf (accel[index]), accNorm);
tmp1 =
simd_max_v(tmp1, simd_and_v((simd_type &)accel[index], signMask));
//NEWTON_ASSERT (dgCheckFloat(deltaForce[index]));
}
}
tmp0 = simd_add_v(tmp0, simd_move_hl_v(tmp0, tmp0));
tmp0 =
simd_add_s(tmp0, simd_permut_v(tmp0, tmp0, PURMUT_MASK(0, 0, 0, 1)));
simd_store_s(tmp0, &akNum);
tmp1 = simd_max_v(tmp1, simd_move_hl_v(tmp1, tmp1));
tmp1 =
simd_max_s(tmp1, simd_permut_v(tmp1, tmp1, PURMUT_MASK(0, 0, 0, 1)));
simd_store_s(tmp1, &accNorm);
NEWTON_ASSERT(akNum >= dgFloat32(0.0f));
passes = -1;
maxPasses = forceRows;
} else {
simd_type tmp1;
// accNorm = dgFloat32(0.0f);
tmp1 = zero;
for (dgInt32 i = 0; i < m_jointCount; i++) {
dgInt32 first;
dgInt32 count;
dgInt32 index;
first = constraintArray[i].m_autoPairstart;
count = constraintArray[i].m_autoPairActiveCount;
for (dgInt32 j = 0; j < count; j += DG_SIMD_WORD_SIZE) {
index = j + first;
// force[index] += ak * deltaForce[index];
// accel[index] -= ak * deltaAccel[index];
(simd_type &)force[index] =
simd_mul_add_v((simd_type &)force[index], akSimd, (simd_type &)deltaForce[index]);
(simd_type &)accel[index] =
simd_mul_sub_v((simd_type &)accel[index], akSimd, (simd_type &)deltaAccel[index]);
// accNorm = GetMax (dgAbsf (accel[index]), accNorm);
tmp1 =
simd_max_v(tmp1, simd_and_v((simd_type &)accel[index], signMask));
}
}
tmp1 = simd_max_v(tmp1, simd_move_hl_v(tmp1, tmp1));
tmp1 =
simd_max_s(tmp1, simd_permut_v(tmp1, tmp1, PURMUT_MASK(0, 0, 0, 1)));
simd_store_s(tmp1, &accNorm);
if (accNorm > maxAccNorm) {
simd_type tmp0;
// simd_type akNum;
// akDen = akNum;
// akNum = dgFloat32(0.0f);
tmp0 = zero;
akDenSimd = simd_set1(akNum);
for (dgInt32 i = 0; i < m_jointCount; i++) {
dgInt32 first;
dgInt32 count;
dgInt32 index;
first = constraintArray[i].m_autoPairstart;
count = constraintArray[i].m_autoPairActiveCount;
for (dgInt32 j = 0; j < count; j += DG_SIMD_WORD_SIZE) {
index = j + first;
// deltaAccel[index] = accel[index] * invDJMinvJt[index];
(simd_type &)deltaAccel[index] =
simd_mul_v((simd_type &)accel[index], (simd_type &)invDJMinvJt[index]);
// akNum += accel[index] * deltaAccel[index];
tmp0 =
simd_mul_add_v(tmp0, (simd_type &)accel[index], (simd_type &)deltaAccel[index]);
}
}
tmp0 = simd_add_v(tmp0, simd_move_hl_v(tmp0, tmp0));
tmp0 =
simd_add_s(tmp0, simd_permut_v(tmp0, tmp0, PURMUT_MASK(0, 0, 0, 1)));
simd_store_s(tmp0, &akNum);
// NEWTON_ASSERT (akNum >= dgFloat32 (0.0f));
// NEWTON_ASSERT (akDen > dgFloat32(0.0f));
// akDen = GetMax (akDen, dgFloat32 (1.0e-17f));
// ak = dgFloat32 (akNum / akDen);
tmp0 =
simd_div_s(tmp0, simd_max_s(akDenSimd, simd_set1(dgFloat32(1.0e-17f))));
tmp0 = simd_permut_v(tmp0, tmp0, PURMUT_MASK(0, 0, 0, 0));
for (dgInt32 i = 0; i < m_jointCount; i++) {
dgInt32 first;
dgInt32 count;
dgInt32 index;
first = constraintArray[i].m_autoPairstart;
count = constraintArray[i].m_autoPairActiveCount;
for (dgInt32 j = 0; j < count; j += DG_SIMD_WORD_SIZE) {
index = j + first;
// deltaForce[index] = deltaAccel[index] + ak * deltaForce[index];
(simd_type &)deltaForce[index] =
simd_mul_add_v((simd_type &)deltaAccel[index], tmp0, (simd_type &)deltaForce[index]);
}
}
}
}
totalPassesCount++;
}
ApplyExternalForcesAndAccelerationSimd(maxAccNorm);
#endif
}
void dgJacobianMemory::CalculateForcesSimulationMode(dgFloat32 maxAccNorm) const {
dgInt32 passes;
dgInt32 prevJoint;
dgInt32 maxPasses;
dgInt32 forceRows;
//dgInt32 totalPassesCount;
dgFloat32 akNum;
dgFloat32 accNorm;
dgFloat32 *const force = m_force;
dgFloat32 *const accel = m_accel;
const dgJacobianPair *const Jt = m_Jt;
dgFloat32 *const deltaAccel = m_deltaAccel;
dgFloat32 *const deltaForce = m_deltaForce;
const dgJacobianPair *const JMinv = m_JMinv;
const dgFloat32 *const diagDamp = m_diagDamp;
const dgFloat32 *const invDJMinvJt = m_invDJMinvJt;
dgJacobian *const internalForces = m_internalForces;
dgInt32 *const normalForceIndex = m_normalForceIndex;
dgJointInfo *const constraintArray = m_constraintArray;
const dgFloat32 *const coordenateAccel = m_coordenateAccel;
dgFloat32 *const lowerForceBound = m_lowerBoundFrictionCoefficent;
dgFloat32 *const upperForceBound = m_upperBoundFrictionCoefficent;
dgFloat32 forceStep[DG_CONSTRAINT_MAX_ROWS];
dgVector zero(dgFloat32(0.0f), dgFloat32(0.0f), dgFloat32(0.0f),
dgFloat32(0.0f));
// initialize the intermediate force accumulation to zero
for (dgInt32 i = 0; i < m_bodyCount; i++) {
internalForces[i].m_linear = zero;
internalForces[i].m_angular = zero;
}
for (dgInt32 i = 0; i < m_jointCount; i++) {
dgInt32 m0;
dgInt32 m1;
dgInt32 index;
dgInt32 first;
dgInt32 count;
dgFloat32 val;
dgJacobian y0;
dgJacobian y1;
first = constraintArray[i].m_autoPairstart;
count = constraintArray[i].m_autoPairActiveCount;
m0 = constraintArray[i].m_m0;
m1 = constraintArray[i].m_m1;
y0.m_linear = zero;
y0.m_angular = zero;
y1.m_linear = zero;
y1.m_angular = zero;
for (dgInt32 j = 0; j < count; j++) {
index = first + j;
val = force[index];
y0.m_linear += Jt[index].m_jacobian_IM0.m_linear.Scale(val);
y0.m_angular += Jt[index].m_jacobian_IM0.m_angular.Scale(val);
y1.m_linear += Jt[index].m_jacobian_IM1.m_linear.Scale(val);
y1.m_angular += Jt[index].m_jacobian_IM1.m_angular.Scale(val);
}
internalForces[m0].m_linear += y0.m_linear;
internalForces[m0].m_angular += y0.m_angular;
internalForces[m1].m_linear += y1.m_linear;
internalForces[m1].m_angular += y1.m_angular;
}
for (dgInt32 i = 0; i < DG_CONSTRAINT_MAX_ROWS; i++) {
forceStep[i] = dgFloat32(0.0f);
}
maxPasses = 4;
prevJoint = 0;
accNorm = maxAccNorm * dgFloat32(2.0f);
for (passes = 0; (passes < maxPasses) && (accNorm > maxAccNorm); passes++) {
accNorm = dgFloat32(0.0f);
for (dgInt32 currJoint = 0; currJoint < m_jointCount; currJoint++) {
dgInt32 m0;
dgInt32 m1;
dgInt32 index;
dgInt32 rowsCount;
dgFloat32 jointAccel;
dgJacobian y0;
dgJacobian y1;
index = constraintArray[prevJoint].m_autoPairstart;
rowsCount = constraintArray[prevJoint].m_autoPaircount;
m0 = constraintArray[prevJoint].m_m0;
m1 = constraintArray[prevJoint].m_m1;
y0.m_linear = zero;
y0.m_angular = zero;
y1.m_linear = zero;
y1.m_angular = zero;
for (dgInt32 i = 0; i < rowsCount; i++) {
dgFloat32 deltaForceI;
deltaForceI = forceStep[i];
y0.m_linear += Jt[index].m_jacobian_IM0.m_linear.Scale(deltaForceI);
y0.m_angular += Jt[index].m_jacobian_IM0.m_angular.Scale(deltaForceI);
y1.m_linear += Jt[index].m_jacobian_IM1.m_linear.Scale(deltaForceI);
y1.m_angular += Jt[index].m_jacobian_IM1.m_angular.Scale(deltaForceI);
index++;
}
internalForces[m0].m_linear += y0.m_linear;
internalForces[m0].m_angular += y0.m_angular;
internalForces[m1].m_linear += y1.m_linear;
internalForces[m1].m_angular += y1.m_angular;
index = constraintArray[currJoint].m_autoPairstart;
rowsCount = constraintArray[currJoint].m_autoPaircount;
m0 = constraintArray[currJoint].m_m0;
m1 = constraintArray[currJoint].m_m1;
y0 = internalForces[m0];
y1 = internalForces[m1];
for (dgInt32 i = 0; i < rowsCount; i++) {
dgVector acc(
JMinv[index].m_jacobian_IM0.m_linear.CompProduct(y0.m_linear));
acc += JMinv[index].m_jacobian_IM0.m_angular.CompProduct(y0.m_angular);
acc += JMinv[index].m_jacobian_IM1.m_linear.CompProduct(y1.m_linear);
acc += JMinv[index].m_jacobian_IM1.m_angular.CompProduct(y1.m_angular);
accel[i] = coordenateAccel[index] - acc.m_x - acc.m_y - acc.m_z - force[index] * diagDamp[index];
index++;
}
jointAccel = CalculateJointForces(currJoint, forceStep, maxAccNorm);
accNorm = GetMax(accNorm, jointAccel);
prevJoint = currJoint;
}
}
for (dgInt32 i = 0; i < m_jointCount; i++) {
dgInt32 j;
dgInt32 k;
dgInt32 first;
dgInt32 count;
dgInt32 index;
dgFloat32 val;
first = constraintArray[i].m_autoPairstart;
count = constraintArray[i].m_autoPaircount;
constraintArray[i].m_autoPaircount = count;
for (k = 0; k < count; k++) {
index = first + k;
j = normalForceIndex[index];
NEWTON_ASSERT(((j < 0) && (force[j] == dgFloat32(1.0f))) || ((j >= 0) && (force[j] >= dgFloat32(0.0f))));
val = force[j];
lowerForceBound[index] *= val;
upperForceBound[index] *= val;
val = force[index];
force[index] = ClampValue(val, lowerForceBound[index], upperForceBound[index]);
}
}
for (dgInt32 i = 0; i < m_bodyCount; i++) {
internalForces[i].m_linear[0] = dgFloat32(0.0f);
internalForces[i].m_linear[1] = dgFloat32(0.0f);
internalForces[i].m_linear[2] = dgFloat32(0.0f);
internalForces[i].m_linear[3] = dgFloat32(0.0f);
internalForces[i].m_angular[0] = dgFloat32(0.0f);
internalForces[i].m_angular[1] = dgFloat32(0.0f);
internalForces[i].m_angular[2] = dgFloat32(0.0f);
internalForces[i].m_angular[3] = dgFloat32(0.0f);
}
for (dgInt32 i = 0; i < m_jointCount; i++) {
dgInt32 j;
dgInt32 m0;
dgInt32 m1;
dgInt32 first;
dgInt32 count;
dgInt32 index;
dgJacobian y0;
dgJacobian y1;
first = constraintArray[i].m_autoPairstart;
count = constraintArray[i].m_autoPairActiveCount;
m0 = constraintArray[i].m_m0;
m1 = constraintArray[i].m_m1;
y0.m_linear = zero;
y0.m_angular = zero;
y1.m_linear = zero;
y1.m_angular = zero;
for (j = 0; j < count; j++) {
dgFloat32 val;
index = j + first;
val = force[index];
y0.m_linear += Jt[index].m_jacobian_IM0.m_linear.Scale(val);
y0.m_angular += Jt[index].m_jacobian_IM0.m_angular.Scale(val);
y1.m_linear += Jt[index].m_jacobian_IM1.m_linear.Scale(val);
y1.m_angular += Jt[index].m_jacobian_IM1.m_angular.Scale(val);
}
internalForces[m0].m_linear += y0.m_linear;
internalForces[m0].m_angular += y0.m_angular;
internalForces[m1].m_linear += y1.m_linear;
internalForces[m1].m_angular += y1.m_angular;
}
forceRows = 0;
akNum = dgFloat32(0.0f);
accNorm = dgFloat32(0.0f);
for (dgInt32 i = 0; i < m_jointCount; i++) {
dgInt32 m0;
dgInt32 m1;
dgInt32 first;
dgInt32 count;
dgInt32 index;
dgInt32 activeCount;
bool isClamped[DG_CONSTRAINT_MAX_ROWS];
first = constraintArray[i].m_autoPairstart;
count = constraintArray[i].m_autoPairActiveCount;
m0 = constraintArray[i].m_m0;
m1 = constraintArray[i].m_m1;
const dgJacobian &y0 = internalForces[m0];
const dgJacobian &y1 = internalForces[m1];
for (dgInt32 j = 0; j < count; j++) {
index = j + first;
dgVector tmpAccel(JMinv[index].m_jacobian_IM0.m_linear.CompProduct(y0.m_linear));
tmpAccel += JMinv[index].m_jacobian_IM0.m_angular.CompProduct(y0.m_angular);
tmpAccel += JMinv[index].m_jacobian_IM1.m_linear.CompProduct(y1.m_linear);
tmpAccel += JMinv[index].m_jacobian_IM1.m_angular.CompProduct(y1.m_angular);
accel[index] = coordenateAccel[index] - (tmpAccel.m_x + tmpAccel.m_y + tmpAccel.m_z + force[index] * diagDamp[index]);
}
activeCount = 0;
for (dgInt32 j = 0; j < count; j++) {
dgFloat32 val;
index = j + first;
val = lowerForceBound[index] - force[index];
if ((dgAbsf(val) < dgFloat32(1.0e-5f)) && (accel[index] < dgFloat32(0.0f))) {
force[index] = lowerForceBound[index];
isClamped[j] = true;
} else {
val = upperForceBound[index] - force[index];
if ((dgAbsf(val) < dgFloat32(1.0e-5f)) && (accel[index] > dgFloat32(0.0f))) {
force[index] = upperForceBound[index];
isClamped[j] = true;
} else {
forceRows++;
activeCount++;
deltaForce[index] = accel[index] * invDJMinvJt[index];
akNum += accel[index] * deltaForce[index];
accNorm = GetMax(dgAbsf(accel[index]), accNorm);
isClamped[j] = false;
}
}
}
if (activeCount < count) {
dgInt32 i0;
dgInt32 i1;
i0 = 0;
i1 = count - 1;
constraintArray[i].m_autoPairActiveCount = activeCount;
do {
while ((i0 <= i1) && !isClamped[i0])
i0++;
while ((i0 <= i1) && isClamped[i1])
i1--;
if (i0 < i1) {
SwapRows(first + i0, first + i1);
i0++;
i1--;
}
} while (i0 < i1);
}
}
maxPasses = forceRows;
//totalPassesCount = 0;
for (passes = 0; (passes < maxPasses) && (accNorm > maxAccNorm); passes++) {
dgInt32 clampedForceIndex;
dgInt32 clampedForceJoint;
dgFloat32 ak;
dgFloat32 akDen;
dgFloat32 clampedForceIndexValue;
for (dgInt32 i = 0; i < m_bodyCount; i++) {
internalForces[i].m_linear[0] = dgFloat32(0.0f);
internalForces[i].m_linear[1] = dgFloat32(0.0f);
internalForces[i].m_linear[2] = dgFloat32(0.0f);
internalForces[i].m_linear[3] = dgFloat32(0.0f);
internalForces[i].m_angular[0] = dgFloat32(0.0f);
internalForces[i].m_angular[1] = dgFloat32(0.0f);
internalForces[i].m_angular[2] = dgFloat32(0.0f);
internalForces[i].m_angular[3] = dgFloat32(0.0f);
}
for (dgInt32 i = 0; i < m_jointCount; i++) {
dgJacobian y0;
dgJacobian y1;
dgInt32 jfirst = constraintArray[i].m_autoPairstart;
dgInt32 jcount = constraintArray[i].m_autoPairActiveCount;
dgInt32 m0 = constraintArray[i].m_m0;
dgInt32 m1 = constraintArray[i].m_m1;
y0.m_linear = zero;
y0.m_angular = zero;
y1.m_linear = zero;
y1.m_angular = zero;
for (dgInt32 j = 0; j < jcount; j++) {
dgInt32 index = j + jfirst;
dgFloat32 jak = deltaForce[index];
y0.m_linear += Jt[index].m_jacobian_IM0.m_linear.Scale(jak);
y0.m_angular += Jt[index].m_jacobian_IM0.m_angular.Scale(jak);
y1.m_linear += Jt[index].m_jacobian_IM1.m_linear.Scale(jak);
y1.m_angular += Jt[index].m_jacobian_IM1.m_angular.Scale(jak);
}
internalForces[m0].m_linear += y0.m_linear;
internalForces[m0].m_angular += y0.m_angular;
internalForces[m1].m_linear += y1.m_linear;
internalForces[m1].m_angular += y1.m_angular;
}
akDen = dgFloat32(0.0f);
for (dgInt32 i = 0; i < m_jointCount; i++) {
dgInt32 j;
dgInt32 m0;
dgInt32 m1;
dgInt32 first;
dgInt32 count;
dgInt32 index;
first = constraintArray[i].m_autoPairstart;
count = constraintArray[i].m_autoPairActiveCount;
m0 = constraintArray[i].m_m0;
m1 = constraintArray[i].m_m1;
const dgJacobian &y0 = internalForces[m0];
const dgJacobian &y1 = internalForces[m1];
for (j = 0; j < count; j++) {
index = j + first;
dgVector tmpAccel(JMinv[index].m_jacobian_IM0.m_linear.CompProduct(y0.m_linear));
tmpAccel += JMinv[index].m_jacobian_IM0.m_angular.CompProduct(y0.m_angular);
tmpAccel += JMinv[index].m_jacobian_IM1.m_linear.CompProduct(y1.m_linear);
tmpAccel += JMinv[index].m_jacobian_IM1.m_angular.CompProduct(y1.m_angular);
deltaAccel[index] = tmpAccel.m_x + tmpAccel.m_y + tmpAccel.m_z + deltaForce[index] * diagDamp[index];
akDen += deltaAccel[index] * deltaForce[index];
}
}
NEWTON_ASSERT(akDen > dgFloat32(0.0f));
akDen = GetMax(akDen, dgFloat32(1.0e-16f));
NEWTON_ASSERT(dgAbsf(akDen) >= dgFloat32(1.0e-16f));
ak = akNum / akDen;
clampedForceIndex = -1;
clampedForceJoint = -1;
clampedForceIndexValue = dgFloat32(0.0f);
for (dgInt32 i = 0; i < m_jointCount; i++) {
dgInt32 j;
dgInt32 first;
dgInt32 count;
dgInt32 index;
dgFloat32 val;
if (ak > dgFloat32(1.0e-8f)) {
first = constraintArray[i].m_autoPairstart;
count = constraintArray[i].m_autoPairActiveCount;
for (j = 0; j < count; j++) {
index = j + first;
val = force[index] + ak * deltaForce[index];
if (deltaForce[index] < dgFloat32(-1.0e-16f)) {
// if (val < bilateralForceBounds[index].m_low) {
if (val < lowerForceBound[index]) {
ak = GetMax((lowerForceBound[index] - force[index]) / deltaForce[index], dgFloat32(0.0f));
NEWTON_ASSERT(ak >= dgFloat32(0.0f));
clampedForceIndex = j;
clampedForceJoint = i;
// clampedForceIndexValue = bilateralForceBounds[index].m_low;
clampedForceIndexValue = lowerForceBound[index];
}
} else if (deltaForce[index] > dgFloat32(1.0e-16f)) {
// if (val > bilateralForceBounds[index].m_upper) {
if (val > upperForceBound[index]) {
ak = GetMax((upperForceBound[index] - force[index]) / deltaForce[index], dgFloat32(0.0f));
NEWTON_ASSERT(ak >= dgFloat32(0.0f));
clampedForceIndex = j;
clampedForceJoint = i;
clampedForceIndexValue = upperForceBound[index];
}
}
}
}
}
if (clampedForceIndex >= 0) {
dgInt32 first;
dgInt32 count;
dgInt32 activeCount;
bool isClamped[DG_CONSTRAINT_MAX_ROWS];
for (dgInt32 i = 0; i < m_jointCount; i++) {
dgInt32 jfirst = constraintArray[i].m_autoPairstart;
dgInt32 jcount = constraintArray[i].m_autoPairActiveCount;
for (dgInt32 j = 0; j < jcount; j++) {
dgInt32 jindex = j + jfirst;
force[jindex] += ak * deltaForce[jindex];
accel[jindex] -= ak * deltaAccel[jindex];
}
}
first = constraintArray[clampedForceJoint].m_autoPairstart;
count = constraintArray[clampedForceJoint].m_autoPairActiveCount;
count--;
force[first + clampedForceIndex] = clampedForceIndexValue;
if (clampedForceIndex != count) {
SwapRows(first + clampedForceIndex, first + count);
}
activeCount = count;
for (dgInt32 i = 0; i < count; i++) {
dgInt32 index;
dgFloat32 val;
index = first + i;
isClamped[i] = false;
val = lowerForceBound[index] - force[index];
if ((val > dgFloat32(-1.0e-5f)) && (accel[index] < dgFloat32(0.0f))) {
activeCount--;
isClamped[i] = true;
} else {
val = upperForceBound[index] - force[index];
if ((val < dgFloat32(1.0e-5f)) && (accel[index] > dgFloat32(0.0f))) {
activeCount--;
isClamped[i] = true;
}
}
}
if (activeCount < count) {
dgInt32 i0;
dgInt32 i1;
i0 = 0;
i1 = count - 1;
do {
while ((i0 <= i1) && !isClamped[i0])
i0++;
while ((i0 <= i1) && isClamped[i1])
i1--;
if (i0 < i1) {
SwapRows(first + i0, first + i1);
// Swap (isClamped[i0], isClamped[i1]);
// Swap (permutationIndex[i0], permutationIndex[i1]);
i0++;
i1--;
}
} while (i0 < i1);
}
constraintArray[clampedForceJoint].m_autoPairActiveCount = activeCount;
forceRows = 0;
akNum = dgFloat32(0.0f);
accNorm = dgFloat32(0.0f);
for (dgInt32 i = 0; i < m_jointCount; i++) {
dgInt32 jfirst = constraintArray[i].m_autoPairstart;
dgInt32 jcount = constraintArray[i].m_autoPairActiveCount;
forceRows += jcount;
for (dgInt32 j = 0; j < jcount; j++) {
dgInt32 jindex = jfirst + j;
NEWTON_ASSERT((i != clampedForceJoint) || !((dgAbsf(lowerForceBound[jindex] - force[jindex]) < dgFloat32(1.0e-5f)) && (accel[jindex] < dgFloat32(0.0f))));
NEWTON_ASSERT((i != clampedForceJoint) || !((dgAbsf(upperForceBound[jindex] - force[jindex]) < dgFloat32(1.0e-5f)) && (accel[jindex] > dgFloat32(0.0f))));
deltaForce[jindex] = accel[jindex] * invDJMinvJt[jindex];
akNum += deltaForce[jindex] * accel[jindex];
accNorm = GetMax(dgAbsf(accel[jindex]), accNorm);
NEWTON_ASSERT(dgCheckFloat(deltaForce[jindex]));
}
}
NEWTON_ASSERT(akNum >= dgFloat32(0.0f));
passes = -1;
maxPasses = forceRows;
} else {
accNorm = dgFloat32(0.0f);
for (dgInt32 i = 0; i < m_jointCount; i++) {
dgInt32 j;
dgInt32 first;
dgInt32 count;
dgInt32 index;
first = constraintArray[i].m_autoPairstart;
count = constraintArray[i].m_autoPairActiveCount;
for (j = 0; j < count; j++) {
index = j + first;
force[index] += ak * deltaForce[index];
accel[index] -= ak * deltaAccel[index];
accNorm = GetMax(dgAbsf(accel[index]), accNorm);
}
}
if (accNorm > maxAccNorm) {
akDen = akNum;
akNum = dgFloat32(0.0f);
for (dgInt32 i = 0; i < m_jointCount; i++) {
dgInt32 j;
dgInt32 first;
dgInt32 count;
dgInt32 index;
first = constraintArray[i].m_autoPairstart;
count = constraintArray[i].m_autoPairActiveCount;
for (j = 0; j < count; j++) {
index = j + first;
deltaAccel[index] = accel[index] * invDJMinvJt[index];
akNum += accel[index] * deltaAccel[index];
}
}
NEWTON_ASSERT(akNum >= dgFloat32(0.0f));
NEWTON_ASSERT(akDen > dgFloat32(0.0f));
akDen = GetMax(akDen, dgFloat32(1.0e-17f));
ak = dgFloat32(akNum / akDen);
for (dgInt32 i = 0; i < m_jointCount; i++) {
dgInt32 j;
dgInt32 first;
dgInt32 count;
dgInt32 index;
first = constraintArray[i].m_autoPairstart;
count = constraintArray[i].m_autoPairActiveCount;
for (j = 0; j < count; j++) {
index = j + first;
deltaForce[index] = deltaAccel[index] + ak * deltaForce[index];
}
}
}
}
//totalPassesCount++;
}
ApplyExternalForcesAndAcceleration(maxAccNorm);
}
dgFloat32 dgJacobianMemory::CalculateJointForcesSimd(dgInt32 joint,
dgFloat32 *forceStep, dgFloat32 maxAccNorm) const {
#ifdef DG_BUILD_SIMD_CODE
dgInt32 m0;
dgInt32 m1;
dgInt32 first;
dgInt32 count;
dgInt32 roundCount;
dgInt32 maxPasses;
dgInt32 clampedForceIndex;
dgFloat32 ak;
dgFloat32 akNum;
dgFloat32 retAccel;
dgFloatSign tmpIndex;
// dgFloat32 akDen;
// dgFloat32 force;
dgFloat32 accNorm;
// dgFloat32 retAccNorm;
dgFloat32 clampedForceIndexValue;
// dgJacobian y0;
// dgJacobian y1;
// simd_type tmp0;
// simd_type tmp1;
// simd_type tmp2;
// simd_type tmp3;
simd_type akNumSimd;
simd_type accNormSimd;
simd_type maxPassesSimd;
simd_type y0_linear;
simd_type y0_angular;
simd_type y1_linear;
simd_type y1_angular;
simd_type one;
simd_type zero;
simd_type signMask;
simd_type tol_pos_1eNeg5;
simd_type tol_pos_1eNeg8;
simd_type tol_neg_1eNeg16;
simd_type tol_pos_1eNeg16;
simd_type deltaAccelPtr[DG_CONSTRAINT_MAX_ROWS / DG_SIMD_WORD_SIZE];
simd_type deltaForcePtr[DG_CONSTRAINT_MAX_ROWS / DG_SIMD_WORD_SIZE];
simd_type activeRowPtr[DG_CONSTRAINT_MAX_ROWS / DG_SIMD_WORD_SIZE];
simd_type lowBoundPtr[DG_CONSTRAINT_MAX_ROWS / DG_SIMD_WORD_SIZE];
simd_type highBoundPtr[DG_CONSTRAINT_MAX_ROWS / DG_SIMD_WORD_SIZE];
dgFloat32 *const deltaAccel = (dgFloat32 *)deltaAccelPtr;
dgFloat32 *const deltaForce = (dgFloat32 *)deltaForcePtr;
dgFloat32 *const activeRow = (dgFloat32 *)activeRowPtr;
dgFloat32 *const lowBound = (dgFloat32 *)lowBoundPtr;
dgFloat32 *const highBound = (dgFloat32 *)highBoundPtr;
dgFloat32 *const accel = m_accel;
dgFloat32 *const force = m_force;
const dgJacobianPair *const Jt = m_Jt;
const dgJacobianPair *const JMinv = m_JMinv;
const dgFloat32 *const diagDamp = m_diagDamp;
const dgFloat32 *const invDJMinvJt = m_invDJMinvJt;
// const dgFloat32* const lowerFrictionForce = m_lowerBoundFrictionCoefficent;
// const dgFloat32* const upperFrictionForce = m_upperBoundFrictionCoefficent;
const dgFloat32 *const lowerFrictionCoef = m_lowerBoundFrictionCoefficent;
const dgFloat32 *const upperFrictionCoef = m_upperBoundFrictionCoefficent;
const dgInt32 *const normalForceIndex = m_normalForceIndex;
const dgJointInfo *const constraintArray = m_constraintArray;
// const dgJacobianIndex* const jacobianIndexArray = m_jacobianIndexArray;
// count = 0x7fffffff;
// signMask = simd_set1((dgFloat32&) count);
tmpIndex.m_integer.m_iVal = 0x7fffffff;
signMask = simd_set1(tmpIndex.m_fVal);
one = simd_set1(dgFloat32(1.0f));
zero = simd_set1(dgFloat32(0.0f));
tol_pos_1eNeg8 = simd_set1(dgFloat32(1.0e-8f));
tol_pos_1eNeg5 = simd_set1(dgFloat32(1.0e-5f));
tol_pos_1eNeg16 = simd_set1(dgFloat32(1.0e-16f));
tol_neg_1eNeg16 = simd_set1(dgFloat32(-1.0e-16f));
first = constraintArray[joint].m_autoPairstart;
count = constraintArray[joint].m_autoPaircount;
m0 = constraintArray[joint].m_m0;
m1 = constraintArray[joint].m_m1;
roundCount = count & (-DG_SIMD_WORD_SIZE);
if (roundCount != count) {
roundCount += 4;
for (dgInt32 j = count; j < roundCount; j++) {
dgInt32 i;
i = first + j;
// force[i] = dgFloat32 (0.0f);
// accel[j] -= dgFloat32 (0.0f);
// activeRow[j] = dgFloat32 (0.0f);
// deltaAccel[j] = dgFloat32 (0.0f);
// deltaForce[j] = dgFloat32 (0.0f);
m_normalForceIndex[i] = -1;
simd_store_s(zero, &force[i]);
simd_store_s(zero, &accel[j]);
simd_store_s(zero, &activeRow[j]);
simd_store_s(zero, &deltaAccel[j]);
simd_store_s(zero, &deltaForce[j]);
simd_store_s(one, &m_lowerBoundFrictionCoefficent[i]);
simd_store_s(zero, &m_upperBoundFrictionCoefficent[i]);
}
}
// akNum = dgFloat32 (0.0f);
// accNorm = dgFloat32(0.0f);
// maxPasses = count;
akNumSimd = zero;
accNormSimd = zero;
maxPassesSimd = zero;
// tmp3 = simd_set1(dgFloat32 (1.0f));
// for (j = 0; j < count; j ++) {
for (dgInt32 j = 0; j < roundCount; j += DG_SIMD_WORD_SIZE) {
dgInt32 i;
// dgInt32 k0;
// dgInt32 k1;
// dgInt32 k2;
// dgInt32 k3;
// simd_type index_k;
// simd_type accel_j;
simd_type force_k;
// simd_type force_i;
// simd_type lowBound_j;
// simd_type higntBound_j;
// simd_type deltaforce_j;
simd_type lowHighBound_test;
i = first + j;
// k = bilateralForceBounds[i].m_normalIndex;
// k0 = normalForceIndex[i + 0];
// k1 = normalForceIndex[i + 1];
// k2 = normalForceIndex[i + 2];
// k3 = normalForceIndex[i + 3];
// val = (k >= 0) ? force[k] : dgFloat32 (1.0f);
// val = force[k];
// index_k = simd_move_lh_v (simd_pack_lo_v (simd_load_is (zero, k0), simd_load_is (zero, k1)), simd_pack_lo_v (simd_load_is (zero, k2), simd_load_is (zero, k3)));
// index_k = simd_cmpge_v (index_k, zero);
force_k =
simd_move_lh_v(simd_pack_lo_v(simd_load_s(force[normalForceIndex[i + 0]]), simd_load_s(force[normalForceIndex[i + 1]])), simd_pack_lo_v(simd_load_s(force[normalForceIndex[i + 2]]), simd_load_s(force[normalForceIndex[i + 3]])));
// force_k = simd_or_v (simd_and_v(force_k, index_k), simd_andnot_v (one, index_k));
// lowBound[j] = val * bilateralForceBounds[i].m_low;
// highBound[j] = val * bilateralForceBounds[i].m_upper;
(simd_type &)lowBound[j] =
simd_mul_v(force_k, (simd_type &)lowerFrictionCoef[i]);
(simd_type &)highBound[j] =
simd_mul_v(force_k, (simd_type &)upperFrictionCoef[i]);
// activeRow[j] = dgFloat32 (1.0f);
// forceStep[j] = m_force[i];
// if (force[i] < lowBound[j]) {
// maxPasses --;
// force[i] = lowBound[j];
// activeRow[j] = dgFloat32 (0.0f);
// } else if (force[i] > highBound[j]) {
// maxPasses --;
// force[i] = highBound[j];
// activeRow[j] = dgFloat32 (0.0f);
// }
(simd_type &)forceStep[j] = (simd_type &)force[i];
// lowBound_test = simd_cmplt_v (force_i, lowBound_j);
// higntBound_test = simd_cmpgt_v (force_i, higntBound_j);
lowHighBound_test =
simd_or_v(simd_cmplt_v((simd_type &)force[i], (simd_type &)lowBound[j]), simd_cmpgt_v((simd_type &)force[i], (simd_type &)highBound[j]));
(simd_type &)activeRow[j] = simd_andnot_v(one, lowHighBound_test);
maxPassesSimd = simd_add_v(maxPassesSimd, (simd_type &)activeRow[j]);
// force_k = simd_or_v (simd_and_v (lowBound_j, lowBound_test), simd_and_v (higntBound_j, higntBound_test));
//(simd_type&)force[i] = simd_mul_v (activeRow_j, simd_or_v (simd_and_v (force_k, lowHighBound_test), simd_andnot_v (force_i, lowHighBound_test)));
(simd_type &)force[i] =
simd_min_v((simd_type &)highBound[j], simd_max_v((simd_type &)force[i], (simd_type &)lowBound[j]));
// deltaForce[j] = accel[j] * invDJMinvJt[i] * activeRow[j];
(simd_type &)deltaForce[j] =
simd_mul_v((simd_type &)accel[j], simd_mul_v((simd_type &)invDJMinvJt[i], (simd_type &)activeRow[j]));
// akNum += accel[j] * deltaForce[j];
akNumSimd =
simd_mul_add_v(akNumSimd, (simd_type &)accel[j], (simd_type &)deltaForce[j]);
// accNorm = GetMax (dgAbsf (accel[j] * activeRow[j]), accNorm);
accNormSimd =
simd_max_v(accNormSimd, simd_and_v(simd_mul_v((simd_type &)accel[j], (simd_type &)activeRow[j]), signMask));
}
akNumSimd = simd_add_v(akNumSimd, simd_move_hl_v(akNumSimd, akNumSimd));
simd_store_s(
simd_add_s(akNumSimd, simd_permut_v(akNumSimd, akNumSimd, PURMUT_MASK(0, 0, 0, 1))),
&akNum);
accNormSimd =
simd_max_v(accNormSimd, simd_move_hl_v(accNormSimd, accNormSimd));
simd_store_s(
simd_max_s(accNormSimd, simd_permut_v(accNormSimd, accNormSimd, PURMUT_MASK(0, 0, 0, 1))),
&accNorm);
maxPassesSimd =
simd_add_v(maxPassesSimd, simd_move_hl_v(maxPassesSimd, maxPassesSimd));
maxPasses =
simd_store_is(simd_add_s(maxPassesSimd, simd_permut_v(maxPassesSimd, maxPassesSimd, PURMUT_MASK(0, 0, 0, 1))));
retAccel = accNorm;
clampedForceIndexValue = dgFloat32(0.0f);
for (dgInt32 i = 0; (i < maxPasses) && (accNorm > maxAccNorm); i++) {
simd_type akSimd;
simd_type akDenSimd;
// y0.m_linear = zero;
// y0.m_angular = zero;
// y1.m_linear = zero;
// y1.m_angular = zero;
y0_linear = zero;
y0_angular = zero;
y1_linear = zero;
y1_angular = zero;
for (dgInt32 j = 0; j < count; j++) {
dgInt32 k;
simd_type tmp1;
k = j + first;
// ak = deltaForce[j];
tmp1 = simd_set1(deltaForce[j]);
// y0.m_linear += m_Jt[k].m_jacobian_IM0.m_linear.Scale (ak);
// y0.m_angular += m_Jt[k].m_jacobian_IM0.m_angular.Scale (ak);
// y1.m_linear += m_Jt[k].m_jacobian_IM1.m_linear.Scale (ak);
// y1.m_angular += m_Jt[k].m_jacobian_IM1.m_angular.Scale (ak);
y0_linear =
simd_mul_add_v(y0_linear, (simd_type &)Jt[k].m_jacobian_IM0.m_linear, tmp1);
y0_angular =
simd_mul_add_v(y0_angular, (simd_type &)Jt[k].m_jacobian_IM0.m_angular, tmp1);
y1_linear =
simd_mul_add_v(y1_linear, (simd_type &)Jt[k].m_jacobian_IM1.m_linear, tmp1);
y1_angular =
simd_mul_add_v(y1_angular, (simd_type &)Jt[k].m_jacobian_IM1.m_angular, tmp1);
}
// akDen = dgFloat32 (0.0f);
akDenSimd = zero;
for (dgInt32 j = 0; j < count; j++) {
dgInt32 k;
simd_type tmp1;
k = j + first;
// dgVector acc (m_JMinv[k].m_jacobian_IM0.m_linear.CompProduct(y0.m_linear));
// acc += m_JMinv[k].m_jacobian_IM0.m_angular.CompProduct(y0.m_angular);
// acc += m_JMinv[k].m_jacobian_IM1.m_linear.CompProduct(y1.m_linear);
// acc += m_JMinv[k].m_jacobian_IM1.m_angular.CompProduct(y1.m_angular);
tmp1 =
simd_mul_v((simd_type &)JMinv[k].m_jacobian_IM0.m_linear, y0_linear);
tmp1 =
simd_mul_add_v(tmp1, (simd_type &)JMinv[k].m_jacobian_IM0.m_angular, y0_angular);
tmp1 =
simd_mul_add_v(tmp1, (simd_type &)JMinv[k].m_jacobian_IM1.m_linear, y1_linear);
tmp1 =
simd_mul_add_v(tmp1, (simd_type &)JMinv[k].m_jacobian_IM1.m_angular, y1_angular);
// deltaAccel[j] = acc.m_x + acc.m_y + acc.m_z + deltaForce[j] * m_diagDamp[k];
tmp1 = simd_add_v(tmp1, simd_move_hl_v(tmp1, tmp1));
tmp1 =
simd_add_s(tmp1, simd_permut_v(tmp1, tmp1, PURMUT_MASK(3, 3, 3, 1)));
tmp1 =
simd_mul_add_s(tmp1, simd_load_s(deltaForce[j]), simd_load_s(diagDamp[k]));
simd_store_s(tmp1, &deltaAccel[j]);
// akDen += deltaAccel[j] * deltaForce[j];
akDenSimd = simd_mul_add_s(akDenSimd, tmp1, simd_load_s(deltaForce[j]));
}
//NEWTON_ASSERT (akDen > dgFloat32 (0.0f));
// akDen = GetMax (akDen, dgFloat32(1.0e-16f));
//NEWTON_ASSERT (dgAbsf (akDen) >= dgFloat32(1.0e-16f));
// ak = akNum / akDen;
akSimd =
simd_div_s(simd_load_s(akNum), simd_max_s(akDenSimd, tol_pos_1eNeg16));
// simd_store_s (tmp0, &ak);
// clampedForceIndex = -1;
simd_type min_index;
simd_type minClampIndex;
simd_type min_index_step;
simd_type campedIndexValue;
campedIndexValue = zero;
minClampIndex = simd_set1(dgFloat32(-1.0f));
min_index_step = simd_set1(dgFloat32(4.0f));
akSimd = simd_permut_v(akSimd, akSimd, PURMUT_MASK(0, 0, 0, 0));
min_index =
simd_set(dgFloat32(0.0f), dgFloat32(1.0f), dgFloat32(2.0f), dgFloat32(3.0f));
// for (j = 0; j < roundCount; j ++) {
for (dgInt32 j = 0; j < roundCount; j += DG_SIMD_WORD_SIZE) {
// if (activeRow[j]) {
// dgFloat32 val;
// k = j + first;
// if (deltaForce[j] < dgFloat32 (-1.0e-16f)) {
// val = force[k] + ak * deltaForce[j];
// if (val < lowBound[j]) {
// ak = (lowBound[j] - force[k]) / deltaForce[j];
// clampedForceIndex = j;
// clampedForceIndexValue = lowBound[j];
// if (ak < dgFloat32 (1.0e-8f)) {
// ak = dgFloat32 (0.0f);
// break;
// }
// }
// } else if (deltaForce[j] > dgFloat32 (1.0e-16f)) {
// val = force[k] + ak * deltaForce[j];
// if (val > highBound[j]) {
// ak = (highBound[j] - force[k]) / deltaForce[j];
// clampedForceIndex = j;
// clampedForceIndexValue = highBound[j];
// if (ak < dgFloat32 (1.0e-8f)) {
// ak = dgFloat32 (0.0f);
// break;
// }
// }
// }
// }
//
// bool test;
// bool negTest;
// bool posTest;
// bool negValTest;
// bool posValTest;
// bool negDeltaForceTest;
// bool posDeltaForceTest;
// dgFloat32 val;
// dgFloat32 num;
// dgFloat32 den;
dgInt32 k;
simd_type val;
simd_type num;
simd_type den;
simd_type test;
simd_type negTest;
simd_type posTest;
simd_type negValTest;
simd_type posValTest;
simd_type negDeltaForceTest;
simd_type posDeltaForceTest;
// Make sure AK is not negative
k = j + first;
// val = force[k] + ak * deltaForce[j];
val =
simd_mul_add_v((simd_type &)force[k], akSimd, (simd_type &)deltaForce[j]);
// negValTest = val < lowBound[j];
negValTest = simd_cmplt_v(val, (simd_type &)lowBound[j]);
// posValTest = val > highBound[j];
posValTest = simd_cmpgt_v(val, (simd_type &)highBound[j]);
// negDeltaForceTest = deltaForce[j] < dgFloat32 (-1.0e-16f);
negDeltaForceTest =
simd_cmplt_v((simd_type &)deltaForce[j], tol_neg_1eNeg16);
// posDeltaForceTest = deltaForce[j] > dgFloat32 ( 1.0e-16f);
posDeltaForceTest =
simd_cmpgt_v((simd_type &)deltaForce[j], tol_pos_1eNeg16);
// negTest = negValTest & negDeltaForceTest;
negTest = simd_and_v(negValTest, negDeltaForceTest);
// posTest = posValTest & posDeltaForceTest;
posTest = simd_and_v(posValTest, posDeltaForceTest);
// test = negTest | posTest;
test = simd_or_v(negTest, posTest);
// num = negTest ? lowBound[j] : (posTest ? highBound[j] : force[k]);
num =
simd_or_v(simd_and_v((simd_type &)lowBound[j], negTest), simd_and_v((simd_type &)highBound[j], posTest));
num =
simd_or_v(simd_and_v(num, test), simd_andnot_v((simd_type &)force[k], test));
// den = test ? deltaForce[j] : dgFloat32 (1.0f);
den =
simd_or_v(simd_and_v((simd_type &)deltaForce[j], test), simd_andnot_v(one, test));
// test = test & (activeRow[j] > dgFloat32 (0.0f));
test = simd_and_v(test, simd_cmpgt_v((simd_type &)activeRow[j], zero));
//NEWTON_ASSERT (dgAbsf (den) > 1.0e-16f);
// ak = test ? (num - force[k]) / den : ak;
akSimd =
simd_or_v(simd_div_v(simd_sub_v(num, (simd_type &)force[k]), den), simd_andnot_v(akSimd, test));
// ak = (ak < dgFloat32 (1.0e-8f)) ? dgFloat32 (0.0f) : ak;
akSimd = simd_and_v(akSimd, simd_cmpgt_v(akSimd, tol_pos_1eNeg8));
// clampedForceIndex = test ? j : clampedForceIndex;
minClampIndex =
simd_or_v(simd_and_v(min_index, test), simd_andnot_v(minClampIndex, test));
min_index = simd_add_v(min_index, min_index_step);
// clampedForceIndexValue = test ? num : clampedForceIndexValue;
campedIndexValue =
simd_or_v(simd_and_v(num, test), simd_andnot_v(campedIndexValue, test));
}
akDenSimd = simd_move_hl_v(akSimd, akSimd);
maxPassesSimd = simd_cmplt_v(akSimd, akDenSimd);
akSimd = simd_min_v(akSimd, akDenSimd);
minClampIndex =
simd_or_v(simd_and_v(minClampIndex, maxPassesSimd), simd_andnot_v(simd_move_hl_v(minClampIndex, minClampIndex), maxPassesSimd));
campedIndexValue =
simd_or_v(simd_and_v(campedIndexValue, maxPassesSimd), simd_andnot_v(simd_move_hl_v(campedIndexValue, campedIndexValue), maxPassesSimd));
akDenSimd = simd_permut_v(akSimd, akSimd, PURMUT_MASK(0, 0, 0, 1));
maxPassesSimd = simd_cmplt_s(akSimd, akDenSimd);
akSimd = simd_min_s(akSimd, akDenSimd);
minClampIndex =
simd_or_v(simd_and_v(minClampIndex, maxPassesSimd), simd_andnot_v(simd_permut_v(minClampIndex, minClampIndex, PURMUT_MASK(0, 0, 0, 1)), maxPassesSimd));
campedIndexValue =
simd_or_v(simd_and_v(campedIndexValue, maxPassesSimd), simd_andnot_v(simd_permut_v(campedIndexValue, campedIndexValue, PURMUT_MASK(0, 0, 0, 1)), maxPassesSimd));
// tmp2 = zero;
simd_store_s(akSimd, &ak);
clampedForceIndex = simd_store_is(minClampIndex);
simd_store_s(campedIndexValue, &clampedForceIndexValue);
if (ak == dgFloat32(0.0f) && (clampedForceIndex != -1)) {
NEWTON_ASSERT(clampedForceIndex != -1);
// akNum = dgFloat32 (0.0f);
// accNorm = dgFloat32(0.0f);
akNumSimd = zero;
accNormSimd = zero;
maxPassesSimd = zero;
activeRow[clampedForceIndex] = dgFloat32(0.0f);
deltaForce[clampedForceIndex] = dgFloat32(0.0f);
force[clampedForceIndex + first] = clampedForceIndexValue;
// for (j = 0; j < count; j ++) {
for (dgInt32 j = 0; j < roundCount; j += DG_SIMD_WORD_SIZE) {
// for (j = 0; j < roundCount; j ++) {
// if (((dgFloat32*)activeRow)[j]) {
// bool test0;
// bool test1;
// k = j + first;
// val = ((dgFloat32*)lowBound)[j] - force[k];
// if ((dgAbsf (val) < dgFloat32 (1.0e-5f)) && (accel[j] < dgFloat32 (0.0f))) {
// force[k] = lowBound[j];
// activeRow[j] = dgFloat32 (0.0f);
// deltaForce[j] = dgFloat32 (0.0f);
//
// } else {
// val = highBound[j] - force[k];
// if ((dgAbsf (val) < dgFloat32 (1.0e-5f)) && (accel[j] > dgFloat32 (0.0f))) {
// force[k] = highBound[j];
// activeRow[j] = dgFloat32 (0.0f);
// deltaForce[j] = dgFloat32 (0.0f);
// } else {
// NEWTON_ASSERT (activeRow[j] > dgFloat32 (0.0f));
// deltaForce[j] = accel[j] * invDJMinvJt[k];
// akNum += accel[j] * deltaForce[j];
// accNorm = GetMax (dgAbsf (accel[j]), accNorm);
// }
// }
// }
dgInt32 k;
simd_type val_k;
simd_type test_0;
simd_type test_1;
// simd_type test_2;
simd_type accel_k;
simd_type force_k;
k = j + first;
accel_k = (simd_type &)accel[j];
force_k = (simd_type &)force[k];
// val = dgAbsf (lowBound[j] - force[k]);
val_k =
simd_and_v(simd_sub_v((simd_type &)lowBound[j], force_k), signMask);
// test0 = (val < dgFloat32 (1.0e-5f)) & (accel[j] < dgFloat32 (0.0f));
test_0 =
simd_and_v(simd_cmplt_v(val_k, tol_pos_1eNeg5), simd_cmplt_v(accel_k, zero));
// val = dgAbsf (highBound[j] - force[k]);
val_k =
simd_and_v(simd_sub_v((simd_type &)highBound[j], force_k), signMask);
// test1 = (val < dgFloat32 (1.0e-5f)) & (accel[j] > dgFloat32 (0.0f));
test_1 =
simd_and_v(simd_cmplt_v(val_k, tol_pos_1eNeg5), simd_cmpgt_v(accel_k, zero));
// force[k] = test0 ? lowBound[j] : (test1 ? highBound[j] : force[k]);
// val_k = simd_or_v (simd_and_v ((simd_type&)lowBound[j], test_0), simd_and_v ((simd_type&)highBound[j], test_1));
//(simd_type&) force[k] = simd_or_v (simd_and_v (val_k, test_2) , simd_andnot_v (force_k, test_2));
(simd_type &)force[k] =
simd_min_v((simd_type &)highBound[j], simd_max_v((simd_type &)force[k], (simd_type &)lowBound[j]));
// activeRow[j] *= (test0 | test1) ? dgFloat32 (0.0f) : dgFloat32 (1.0f);
// test_2 = simd_or_v (test_0, test_1);
(simd_type &)activeRow[j] =
simd_mul_v((simd_type &)activeRow[j], simd_andnot_v(one, simd_or_v(test_0, test_1)));
// deltaForce[j] = accel[j] * invDJMinvJt[k] * activeRow[j];
(simd_type &)deltaForce[j] =
simd_mul_v(accel_k, simd_mul_v((simd_type &)invDJMinvJt[k], (simd_type &)activeRow[j]));
// akNum += accel[j] * deltaForce[j];
akNumSimd =
simd_mul_add_v(akNumSimd, (simd_type &)deltaForce[j], accel_k);
// accNorm = GetMax (dgAbsf (accel[j] * activeRow[j]), accNorm);
accNormSimd =
simd_max_v(accNormSimd, simd_and_v(simd_mul_v(accel_k, (simd_type &)activeRow[j]), signMask));
// masPases += 1;
maxPassesSimd = simd_add_v(maxPassesSimd, (simd_type &)activeRow[j]);
}
NEWTON_ASSERT(activeRow[clampedForceIndex] == dgFloat32(0.0f));
akNumSimd = simd_add_v(akNumSimd, simd_move_hl_v(akNumSimd, akNumSimd));
akNumSimd =
simd_add_s(akNumSimd, simd_permut_v(akNumSimd, akNumSimd, PURMUT_MASK(0, 0, 0, 1)));
simd_store_s(akNumSimd, &akNum);
accNormSimd =
simd_max_v(accNormSimd, simd_move_hl_v(accNormSimd, accNormSimd));
accNormSimd =
simd_max_s(accNormSimd, simd_permut_v(accNormSimd, accNormSimd, PURMUT_MASK(0, 0, 0, 1)));
simd_store_s(accNormSimd, &accNorm);
i = -1;
// maxPasses = GetMax (maxPasses - 1, 1);
maxPassesSimd =
simd_add_v(maxPassesSimd, simd_move_hl_v(maxPassesSimd, maxPassesSimd));
maxPasses =
simd_store_is(simd_add_s(maxPassesSimd, simd_permut_v(maxPassesSimd, maxPassesSimd, PURMUT_MASK(0, 0, 0, 1))));
} else if (clampedForceIndex >= 0) {
// akNum = dgFloat32(0.0f);
// accNorm = dgFloat32(0.0f);
akNumSimd = zero;
accNormSimd = zero;
// tmp2 = zero;
akSimd = simd_permut_v(akSimd, akSimd, PURMUT_MASK(0, 0, 0, 0));
activeRow[clampedForceIndex] = dgFloat32(0.0f);
// for (j = 0; j < count; j ++) {
for (dgInt32 j = 0; j < roundCount; j += DG_SIMD_WORD_SIZE) {
dgInt32 k;
k = j + first;
// m_force[k] += ak * deltaForce[j];
// m_accel[j] -= ak * deltaAccel[j];
(simd_type &)force[k] =
simd_mul_add_v((simd_type &)force[k], akSimd, (simd_type &)deltaForce[j]);
(simd_type &)accel[j] =
simd_mul_sub_v((simd_type &)accel[j], akSimd, (simd_type &)deltaAccel[j]);
// accNorm = GetMax (dgAbsf (m_accel[j] * activeRow[j]), accNorm);
accNormSimd =
simd_max_v(accNormSimd, simd_and_v(simd_mul_v((simd_type &)accel[j], (simd_type &)activeRow[j]), signMask));
//NEWTON_ASSERT (dgCheckFloat(m_force[k]));
//NEWTON_ASSERT (dgCheckFloat(m_accel[j]));
// deltaForce[j] = m_accel[j] * m_invDJMinvJt[k] * activeRow[j];
(simd_type &)deltaForce[j] =
simd_mul_v((simd_type &)accel[j], simd_mul_v((simd_type &)invDJMinvJt[k], (simd_type &)activeRow[j]));
// akNum += deltaForce[j] * m_accel[j];
akNumSimd =
simd_mul_add_v(akNumSimd, (simd_type &)deltaForce[j], (simd_type &)accel[j]);
}
akNumSimd = simd_add_v(akNumSimd, simd_move_hl_v(akNumSimd, akNumSimd));
akNumSimd =
simd_add_s(akNumSimd, simd_permut_v(akNumSimd, akNumSimd, PURMUT_MASK(0, 0, 0, 1)));
simd_store_s(akNumSimd, &akNum);
accNormSimd =
simd_max_v(accNormSimd, simd_move_hl_v(accNormSimd, accNormSimd));
accNormSimd =
simd_max_s(accNormSimd, simd_permut_v(accNormSimd, accNormSimd, PURMUT_MASK(0, 0, 0, 1)));
simd_store_s(accNormSimd, &accNorm);
force[clampedForceIndex + first] = clampedForceIndexValue;
i = -1;
maxPasses = GetMax(maxPasses - 1, 1);
} else {
// accNorm = dgFloat32(0.0f);
// tmp2 = zero;
accNormSimd = zero;
akSimd = simd_permut_v(akSimd, akSimd, PURMUT_MASK(0, 0, 0, 0));
// for (j = 0; j < count; j ++) {
for (dgInt32 j = 0; j < roundCount; j += DG_SIMD_WORD_SIZE) {
dgInt32 k;
k = j + first;
// m_force[k] += ak * deltaForce[j];
// m_accel[j] -= ak * deltaAccel[j];
(simd_type &)force[k] =
simd_mul_add_v((simd_type &)force[k], akSimd, (simd_type &)deltaForce[j]);
(simd_type &)accel[j] =
simd_mul_sub_v((simd_type &)accel[j], akSimd, (simd_type &)deltaAccel[j]);
// accNorm = GetMax (dgAbsf (m_accel[j] * activeRow[j]), accNorm);
accNormSimd =
simd_max_v(accNormSimd, simd_and_v(simd_mul_v((simd_type &)accel[j], (simd_type &)activeRow[j]), signMask));
//NEWTON_ASSERT (dgCheckFloat(m_force[k]));
//NEWTON_ASSERT (dgCheckFloat(m_accel[j]));
}
accNormSimd =
simd_max_v(accNormSimd, simd_move_hl_v(accNormSimd, accNormSimd));
accNormSimd =
simd_max_s(accNormSimd, simd_permut_v(accNormSimd, accNormSimd, PURMUT_MASK(0, 0, 0, 1)));
simd_store_s(accNormSimd, &accNorm);
if (accNorm > maxAccNorm) {
// akDen = akNum;
// akNum = dgFloat32(0.0f);
akDenSimd = simd_set1(akNum);
akNumSimd = zero;
// for (j = 0; j < count; j ++) {
for (dgInt32 j = 0; j < roundCount; j += DG_SIMD_WORD_SIZE) {
dgInt32 k;
k = j + first;
// deltaAccel[j] = m_accel[j] * m_invDJMinvJt[k] * activeRow[j];
(simd_type &)deltaAccel[j] =
simd_mul_v((simd_type &)accel[j], simd_mul_v((simd_type &)invDJMinvJt[k], (simd_type &)activeRow[j]));
// akNum += m_accel[j] * deltaAccel[j];
akNumSimd =
simd_mul_add_v(akNumSimd, (simd_type &)accel[j], (simd_type &)deltaAccel[j]);
}
akNumSimd = simd_add_v(akNumSimd, simd_move_hl_v(akNumSimd, akNumSimd));
akNumSimd =
simd_add_s(akNumSimd, simd_permut_v(akNumSimd, akNumSimd, PURMUT_MASK(0, 0, 0, 1)));
simd_store_s(akNumSimd, &akNum);
//NEWTON_ASSERT (akDen > dgFloat32(0.0f));
// akDen = GetMax (akDen, dgFloat32 (1.0e-17f));
akDenSimd = simd_max_s(akDenSimd, simd_set1(dgFloat32(1.0e-17f)));
// ak = dgFloat32 (akNum / akDen);
akSimd = simd_div_s(akSimd, akDenSimd);
akSimd = simd_permut_v(akSimd, akSimd, PURMUT_MASK(0, 0, 0, 0));
// for (j = 0; j < count; j ++) {
for (dgInt32 j = 0; j < roundCount; j += DG_SIMD_WORD_SIZE) {
// deltaForce[j] = deltaAccel[j] + ak * deltaForce[j];
(simd_type &)deltaForce[j] =
simd_mul_add_v((simd_type &)deltaAccel[j], akSimd, (simd_type &)deltaForce[j]);
}
}
}
}
// for (j = 0; j < count; j ++) {
for (dgInt32 j = 0; j < roundCount; j += DG_SIMD_WORD_SIZE) {
// forceStep[j] = force[j + first] - forceStep[j];
(simd_type &)forceStep[j] =
simd_sub_v((simd_type &)force[j + first], (simd_type &)forceStep[j]);
}
// tmp0 = simd_set1(DG_SSOR_FACTOR);
// for (j = 0; j < roundCount; j += DG_SIMD_WORD_SIZE) {
// tmp1 = simd_mul_add_v((simd_type&)forceStep[j], simd_sub_v ((simd_type&)force[j + first], (simd_type&)forceStep[j]), tmp0);
// (simd_type&)force[j + first] = simd_min_v ((simd_type&) highBound[j], simd_max_v(tmp1, (simd_type&)lowBound[j]));
// (simd_type&)forceStep[j] = simd_sub_v ((simd_type&)force[j + first], (simd_type&)forceStep[j]);
// }
return retAccel;
#else
return dgFloat32(0.0f);
#endif
}
dgFloat32 dgJacobianMemory::CalculateJointForces(dgInt32 joint,
dgFloat32 *forceStep, dgFloat32 maxAccNorm) const {
dgInt32 first;
dgInt32 count;
dgInt32 maxPasses;
dgInt32 clampedForceIndex;
dgFloat32 ak;
dgFloat32 val;
dgFloat32 akNum;
dgFloat32 akDen;
dgFloat32 accNorm;
dgFloat32 retAccel;
dgFloat32 clampedForceIndexValue;
dgJacobian y0;
dgJacobian y1;
dgFloat32 deltaAccel[DG_CONSTRAINT_MAX_ROWS];
dgFloat32 deltaForce[DG_CONSTRAINT_MAX_ROWS];
dgFloat32 activeRow[DG_CONSTRAINT_MAX_ROWS];
dgFloat32 lowBound[DG_CONSTRAINT_MAX_ROWS];
dgFloat32 highBound[DG_CONSTRAINT_MAX_ROWS];
dgFloat32 *const accel = m_accel;
dgFloat32 *const force = m_force;
const dgJacobianPair *const Jt = m_Jt;
const dgJacobianPair *const JMinv = m_JMinv;
const dgFloat32 *const diagDamp = m_diagDamp;
const dgFloat32 *const invDJMinvJt = m_invDJMinvJt;
const dgInt32 *const normalForceIndex = m_normalForceIndex;
const dgJointInfo *const constraintArray = m_constraintArray;
const dgFloat32 *const lowerFriction = m_lowerBoundFrictionCoefficent;
const dgFloat32 *const upperFriction = m_upperBoundFrictionCoefficent;
// dgBilateralBounds* const bilateralForceBounds = m_bilateralForceBounds;
// const dgJacobianIndex* const jacobianIndexArray = m_jacobianIndexArray;
first = constraintArray[joint].m_autoPairstart;
count = constraintArray[joint].m_autoPaircount;
akNum = dgFloat32(0.0f);
accNorm = dgFloat32(0.0f);
maxPasses = count;
for (dgInt32 j = 0; j < count; j++) {
dgInt32 i;
dgInt32 k;
i = first + j;
// k = bilateralForceBounds[i].m_normalIndex;
k = normalForceIndex[i];
// val = (k >= 0) ? force[k] : dgFloat32 (1.0f);
NEWTON_ASSERT(
((k < 0) && (force[k] == dgFloat32(1.0f))) || ((k >= 0) && (force[k] >= dgFloat32(0.0f))));
val = force[k];
lowBound[j] = val * lowerFriction[i];
highBound[j] = val * upperFriction[i];
activeRow[j] = dgFloat32(1.0f);
forceStep[j] = force[i];
if (force[i] < lowBound[j]) {
maxPasses--;
force[i] = lowBound[j];
activeRow[j] = dgFloat32(0.0f);
} else if (force[i] > highBound[j]) {
maxPasses--;
force[i] = highBound[j];
activeRow[j] = dgFloat32(0.0f);
}
deltaForce[j] = accel[j] * invDJMinvJt[i] * activeRow[j];
akNum += accel[j] * deltaForce[j];
accNorm = GetMax(dgAbsf(accel[j] * activeRow[j]), accNorm);
}
retAccel = accNorm;
clampedForceIndexValue = dgFloat32(0.0f);
dgVector zero(dgFloat32(0.0f), dgFloat32(0.0f), dgFloat32(0.0f),
dgFloat32(0.0f));
for (dgInt32 i = 0; (i < maxPasses) && (accNorm > maxAccNorm); i++) {
y0.m_linear = zero;
y0.m_angular = zero;
y1.m_linear = zero;
y1.m_angular = zero;
for (dgInt32 j = 0; j < count; j++) {
dgInt32 k;
k = j + first;
ak = deltaForce[j];
y0.m_linear += Jt[k].m_jacobian_IM0.m_linear.Scale(ak);
y0.m_angular += Jt[k].m_jacobian_IM0.m_angular.Scale(ak);
y1.m_linear += Jt[k].m_jacobian_IM1.m_linear.Scale(ak);
y1.m_angular += Jt[k].m_jacobian_IM1.m_angular.Scale(ak);
}
akDen = dgFloat32(0.0f);
for (dgInt32 j = 0; j < count; j++) {
dgInt32 k;
k = j + first;
dgVector acc(JMinv[k].m_jacobian_IM0.m_linear.CompProduct(y0.m_linear));
acc += JMinv[k].m_jacobian_IM0.m_angular.CompProduct(y0.m_angular);
acc += JMinv[k].m_jacobian_IM1.m_linear.CompProduct(y1.m_linear);
acc += JMinv[k].m_jacobian_IM1.m_angular.CompProduct(y1.m_angular);
deltaAccel[j] = acc.m_x + acc.m_y + acc.m_z + deltaForce[j] * diagDamp[k];
akDen += deltaAccel[j] * deltaForce[j];
}
NEWTON_ASSERT(akDen > dgFloat32(0.0f));
akDen = GetMax(akDen, dgFloat32(1.0e-16f));
NEWTON_ASSERT(dgAbsf(akDen) >= dgFloat32(1.0e-16f));
ak = akNum / akDen;
clampedForceIndex = -1;
for (dgInt32 j = 0; j < count; j++) {
if (activeRow[j]) {
dgInt32 k;
k = j + first;
if (deltaForce[j] < dgFloat32(-1.0e-16f)) {
val = force[k] + ak * deltaForce[j];
if (val < lowBound[j]) {
ak = GetMax((lowBound[j] - force[k]) / deltaForce[j],
dgFloat32(0.0f));
clampedForceIndex = j;
clampedForceIndexValue = lowBound[j];
if (ak < dgFloat32(1.0e-8f)) {
ak = dgFloat32(0.0f);
break;
}
}
} else if (deltaForce[j] > dgFloat32(1.0e-16f)) {
val = force[k] + ak * deltaForce[j];
if (val > highBound[j]) {
ak = GetMax((highBound[j] - force[k]) / deltaForce[j],
dgFloat32(0.0f));
clampedForceIndex = j;
clampedForceIndexValue = highBound[j];
if (ak < dgFloat32(1.0e-8f)) {
ak = dgFloat32(0.0f);
break;
}
}
}
}
}
// if (ak == dgFloat32 (0.0f)) {
if (ak == dgFloat32(0.0f) && (clampedForceIndex != -1)) {
NEWTON_ASSERT(clampedForceIndex != -1);
akNum = dgFloat32(0.0f);
accNorm = dgFloat32(0.0f);
activeRow[clampedForceIndex] = dgFloat32(0.0f);
deltaForce[clampedForceIndex] = dgFloat32(0.0f);
force[clampedForceIndex + first] = clampedForceIndexValue;
for (dgInt32 j = 0; j < count; j++) {
if (activeRow[j]) {
dgInt32 k;
k = j + first;
val = lowBound[j] - force[k];
if ((dgAbsf(val) < dgFloat32(1.0e-5f)) && (accel[j] < dgFloat32(0.0f))) {
force[k] = lowBound[j];
activeRow[j] = dgFloat32(0.0f);
deltaForce[j] = dgFloat32(0.0f);
} else {
val = highBound[j] - force[k];
if ((dgAbsf(val) < dgFloat32(1.0e-5f)) && (accel[j] > dgFloat32(0.0f))) {
force[k] = highBound[j];
activeRow[j] = dgFloat32(0.0f);
deltaForce[j] = dgFloat32(0.0f);
} else {
NEWTON_ASSERT(activeRow[j] > dgFloat32(0.0f));
deltaForce[j] = accel[j] * invDJMinvJt[k];
akNum += accel[j] * deltaForce[j];
accNorm = GetMax(dgAbsf(accel[j]), accNorm);
}
}
}
}
NEWTON_ASSERT(activeRow[clampedForceIndex] == dgFloat32(0.0f));
i = -1;
maxPasses = GetMax(maxPasses - 1, dgInt32(1));
} else if (clampedForceIndex >= 0) {
akNum = dgFloat32(0.0f);
accNorm = dgFloat32(0.0f);
activeRow[clampedForceIndex] = dgFloat32(0.0f);
for (dgInt32 j = 0; j < count; j++) {
dgInt32 k;
k = j + first;
force[k] += ak * deltaForce[j];
accel[j] -= ak * deltaAccel[j];
accNorm = GetMax(dgAbsf(accel[j] * activeRow[j]), accNorm);
NEWTON_ASSERT(dgCheckFloat(force[k]));
NEWTON_ASSERT(dgCheckFloat(accel[j]));
deltaForce[j] = accel[j] * invDJMinvJt[k] * activeRow[j];
akNum += deltaForce[j] * accel[j];
}
force[clampedForceIndex + first] = clampedForceIndexValue;
i = -1;
maxPasses = GetMax(maxPasses - 1, dgInt32(1));
} else {
accNorm = dgFloat32(0.0f);
for (dgInt32 j = 0; j < count; j++) {
dgInt32 k;
k = j + first;
force[k] += ak * deltaForce[j];
accel[j] -= ak * deltaAccel[j];
accNorm = GetMax(dgAbsf(accel[j] * activeRow[j]), accNorm);
NEWTON_ASSERT(dgCheckFloat(force[k]));
NEWTON_ASSERT(dgCheckFloat(accel[j]));
}
if (accNorm > maxAccNorm) {
akDen = akNum;
akNum = dgFloat32(0.0f);
for (dgInt32 j = 0; j < count; j++) {
dgInt32 k;
k = j + first;
deltaAccel[j] = accel[j] * invDJMinvJt[k] * activeRow[j];
akNum += accel[j] * deltaAccel[j];
}
NEWTON_ASSERT(akDen > dgFloat32(0.0f));
akDen = GetMax(akDen, dgFloat32(1.0e-17f));
ak = dgFloat32(akNum / akDen);
for (dgInt32 j = 0; j < count; j++) {
deltaForce[j] = deltaAccel[j] + ak * deltaForce[j];
}
}
}
}
for (dgInt32 j = 0; j < count; j++) {
forceStep[j] = force[j + first] - forceStep[j];
}
return retAccel;
}
void dgJacobianMemory::CalculateForcesGameModeSimd(dgInt32 iterations,
dgFloat32 maxAccNorm) const {
#ifdef DG_BUILD_SIMD_CODE
dgFloat32 *const force = m_force;
const dgJacobianPair *const Jt = m_Jt;
const dgJacobianPair *const JMinv = m_JMinv;
const dgFloat32 *const diagDamp = m_diagDamp;
const dgFloat32 *const invDJMinvJt = m_invDJMinvJt;
const dgBodyInfo *bodyArray = m_bodyArray;
dgFloat32 *const penetration = m_penetration;
const dgFloat32 *const externAccel = m_deltaAccel;
const dgFloat32 *const restitution = m_restitution;
dgFloat32 *const coordenateAccel = m_coordenateAccel;
dgJacobian *const internalVeloc = m_internalVeloc;
dgJacobian *const internalForces = m_internalForces;
dgFloat32 **const jointForceFeeback = m_jointFeebackForce;
const dgInt32 *const normalForceIndex = m_normalForceIndex;
const dgInt32 *const accelIsMortor = m_accelIsMotor;
const dgJointInfo *const constraintArray = m_constraintArray;
const dgFloat32 *const penetrationStiffness = m_penetrationStiffness;
const dgFloat32 *const lowerFrictionCoef = m_lowerBoundFrictionCoefficent;
const dgFloat32 *const upperFrictionCoef = m_upperBoundFrictionCoefficent;
dgFloat32 invStep = (dgFloat32(1.0f) / dgFloat32(LINEAR_SOLVER_SUB_STEPS));
dgFloat32 timeStep = m_timeStep * invStep;
dgFloat32 invTimeStep = m_invTimeStep * dgFloat32(LINEAR_SOLVER_SUB_STEPS);
dgFloatSign tmpIndex;
tmpIndex.m_integer.m_iVal = 0x7fffffff;
simd_type signMask = simd_set1(tmpIndex.m_fVal);
simd_type zero = simd_set1(dgFloat32(0.0f));
for (dgInt32 i = 1; i < m_bodyCount; i++) {
dgBody *const body = m_bodyArray[i].m_body;
(simd_type &)internalVeloc[i].m_linear = zero;
(simd_type &)internalVeloc[i].m_angular = zero;
(simd_type &)internalForces[i].m_linear = zero;
(simd_type &)internalForces[i].m_angular = zero;
(simd_type &)body->m_netForce = (simd_type &)body->m_veloc;
(simd_type &)body->m_netTorque = (simd_type &)body->m_omega;
}
(simd_type &)internalVeloc[0].m_linear = zero;
(simd_type &)internalVeloc[0].m_angular = zero;
(simd_type &)internalForces[0].m_linear = zero;
(simd_type &)internalForces[0].m_angular = zero;
for (dgInt32 i = 0; i < m_jointCount; i++) {
dgInt32 first = constraintArray[i].m_autoPairstart;
dgInt32 count = constraintArray[i].m_autoPairActiveCount;
dgInt32 m0 = constraintArray[i].m_m0;
dgInt32 m1 = constraintArray[i].m_m1;
// dgJacobian y0 (internalForces[k0]);
// dgJacobian y1 (internalForces[k1]);
simd_type y0_linear = zero;
simd_type y0_angular = zero;
simd_type y1_linear = zero;
simd_type y1_angular = zero;
for (dgInt32 j = 0; j < count; j++) {
dgInt32 index = j + first;
// val = force[index];
simd_type tmp0 = simd_set1(force[index]);
// y0.m_linear += Jt[index].m_jacobian_IM0.m_linear.Scale (val);
// y0.m_angular += Jt[index].m_jacobian_IM0.m_angular.Scale (val);
// y1.m_linear += Jt[index].m_jacobian_IM1.m_linear.Scale (val);
// y1.m_angular += Jt[index].m_jacobian_IM1.m_angular.Scale (val);
y0_linear =
simd_mul_add_v(y0_linear, (simd_type &)Jt[index].m_jacobian_IM0.m_linear, tmp0);
y0_angular =
simd_mul_add_v(y0_angular, (simd_type &)Jt[index].m_jacobian_IM0.m_angular, tmp0);
y1_linear =
simd_mul_add_v(y1_linear, (simd_type &)Jt[index].m_jacobian_IM1.m_linear, tmp0);
y1_angular =
simd_mul_add_v(y1_angular, (simd_type &)Jt[index].m_jacobian_IM1.m_angular, tmp0);
}
// internalForces[k0] = y0;
// internalForces[k1] = y1;
(simd_type &)internalForces[m0].m_linear =
simd_add_v((simd_type &)internalForces[m0].m_linear, y0_linear);
(simd_type &)internalForces[m0].m_angular =
simd_add_v((simd_type &)internalForces[m0].m_angular, y0_angular);
(simd_type &)internalForces[m1].m_linear =
simd_add_v((simd_type &)internalForces[m1].m_linear, y1_linear);
(simd_type &)internalForces[m1].m_angular =
simd_add_v((simd_type &)internalForces[m1].m_angular, y1_angular);
}
simd_type timeStepSimd = simd_set1(timeStep);
dgFloat32 firstPassCoef = dgFloat32(0.0f);
dgInt32 maxPasses = iterations + DG_BASE_ITERATION_COUNT;
for (dgInt32 step = 0; step < LINEAR_SOLVER_SUB_STEPS; step++) {
for (dgInt32 curJoint = 0; curJoint < m_jointCount; curJoint++) {
dgJointAccelerationDecriptor joindDesc;
dgInt32 index = constraintArray[curJoint].m_autoPairstart;
joindDesc.m_rowsCount = constraintArray[curJoint].m_autoPaircount;
joindDesc.m_timeStep = timeStep;
joindDesc.m_invTimeStep = invTimeStep;
joindDesc.m_firstPassCoefFlag = firstPassCoef;
joindDesc.m_Jt = &Jt[index];
joindDesc.m_penetration = &penetration[index];
joindDesc.m_restitution = &restitution[index];
joindDesc.m_accelIsMotor = &accelIsMortor[index];
joindDesc.m_externAccelaration = &externAccel[index];
joindDesc.m_coordenateAccel = &coordenateAccel[index];
joindDesc.m_normalForceIndex = &normalForceIndex[index];
joindDesc.m_penetrationStiffness = &penetrationStiffness[index];
constraintArray[curJoint].m_joint->JointAccelerationsSimd(joindDesc);
}
firstPassCoef = dgFloat32(1.0f);
dgFloat32 accNorm;
accNorm = maxAccNorm * dgFloat32(2.0f);
for (dgInt32 passes = 0; (passes < maxPasses) && (accNorm > maxAccNorm);
passes++) {
simd_type accNormSimd = zero;
for (dgInt32 curJoint = 0; curJoint < m_jointCount; curJoint++) {
dgInt32 index = constraintArray[curJoint].m_autoPairstart;
dgInt32 rowsCount = constraintArray[curJoint].m_autoPaircount;
dgInt32 m0 = constraintArray[curJoint].m_m0;
dgInt32 m1 = constraintArray[curJoint].m_m1;
simd_type linearM0 = (simd_type &)internalForces[m0].m_linear;
simd_type angularM0 = (simd_type &)internalForces[m0].m_angular;
simd_type linearM1 = (simd_type &)internalForces[m1].m_linear;
simd_type angularM1 = (simd_type &)internalForces[m1].m_angular;
for (dgInt32 k = 0; k < rowsCount; k++) {
// dgVector acc (m_JMinv[index].m_jacobian_IM0.m_linear.CompProduct(linearM0));
// acc += m_JMinv[index].m_jacobian_IM0.m_angular.CompProduct (angularM0);
// acc += m_JMinv[index].m_jacobian_IM1.m_linear.CompProduct (linearM1);
// acc += m_JMinv[index].m_jacobian_IM1.m_angular.CompProduct (angularM1);
simd_type a =
simd_mul_v((simd_type &)JMinv[index].m_jacobian_IM0.m_linear, linearM0);
a =
simd_mul_add_v(a, (simd_type &)JMinv[index].m_jacobian_IM0.m_angular, angularM0);
a =
simd_mul_add_v(a, (simd_type &)JMinv[index].m_jacobian_IM1.m_linear, linearM1);
a =
simd_mul_add_v(a, (simd_type &)JMinv[index].m_jacobian_IM1.m_angular, angularM1);
// a = coordenateAccel[index] - acc.m_x - acc.m_y - acc.m_z - force[index] * diagDamp[index];
a = simd_add_v(a, simd_move_hl_v(a, a));
a = simd_add_s(a, simd_permut_v(a, a, PURMUT_MASK(3, 3, 3, 1)));
a =
simd_sub_s(simd_load_s(coordenateAccel[index]), simd_mul_add_s(a, simd_load_s(force[index]), simd_load_s(diagDamp[index])));
// f = force[index] + invDJMinvJt[index] * a;
simd_type f =
simd_mul_add_s(simd_load_s(force[index]), simd_load_s(invDJMinvJt[index]), a);
dgInt32 frictionIndex = m_normalForceIndex[index];
NEWTON_ASSERT(
((frictionIndex < 0) && (force[frictionIndex] == dgFloat32(1.0f))) || ((frictionIndex >= 0) && (force[frictionIndex] >= dgFloat32(0.0f))));
// frictionNormal = force[frictionIndex];
// lowerFrictionForce = frictionNormal * lowerFrictionCoef[index];
// upperFrictionForce = frictionNormal * upperFrictionCoef[index];
simd_type frictionNormal = simd_load_s(force[frictionIndex]);
simd_type lowerFrictionForce =
simd_mul_s(frictionNormal, simd_load_s(lowerFrictionCoef[index]));
simd_type upperFrictionForce =
simd_mul_s(frictionNormal, simd_load_s(upperFrictionCoef[index]));
// if (f > upperFrictionForce) {
// a = dgFloat32 (0.0f);
// f = upperFrictionForce;
// } else if (f < lowerFrictionForce) {
// a = dgFloat32 (0.0f);
// f = lowerFrictionForce;
// }
f =
simd_min_s(simd_max_s(f, lowerFrictionForce), upperFrictionForce);
a =
simd_andnot_v(a, simd_or_v(simd_cmplt_s(f, lowerFrictionForce), simd_cmpgt_s(f, upperFrictionForce)));
accNormSimd = simd_max_s(accNormSimd, simd_and_v(a, signMask));
// prevValue = f - force[index]);
a = simd_sub_s(f, simd_load_s(force[index]));
a = simd_permut_v(a, a, PURMUT_MASK(0, 0, 0, 0));
// force[index] = f;
simd_store_s(f, &force[index]);
linearM0 =
simd_mul_add_v(linearM0, (simd_type &)Jt[index].m_jacobian_IM0.m_linear, a);
angularM0 =
simd_mul_add_v(angularM0, (simd_type &)Jt[index].m_jacobian_IM0.m_angular, a);
linearM1 =
simd_mul_add_v(linearM1, (simd_type &)Jt[index].m_jacobian_IM1.m_linear, a);
angularM1 =
simd_mul_add_v(angularM1, (simd_type &)Jt[index].m_jacobian_IM1.m_angular, a);
index++;
}
// internalForces[prevM0].m_linear += Jt[prevIndex].m_jacobian_IM0.m_linear.Scale (prevValue);
// internalForces[prevM0].m_angular += Jt[prevIndex].m_jacobian_IM0.m_angular.Scale (prevValue);
// internalForces[prevM1].m_linear += Jt[prevIndex].m_jacobian_IM1.m_linear.Scale (prevValue);
// internalForces[prevM1].m_angular += Jt[prevIndex].m_jacobian_IM1.m_angular.Scale (prevValue);
(simd_type &)internalForces[m0].m_linear = linearM0;
(simd_type &)internalForces[m0].m_angular = angularM0;
(simd_type &)internalForces[m1].m_linear = linearM1;
(simd_type &)internalForces[m1].m_angular = angularM1;
}
simd_store_s(accNormSimd, &accNorm);
}
for (dgInt32 i = 1; i < m_bodyCount; i++) {
dgBody *const body = bodyArray[i].m_body;
// dgVector force (body->m_accel + internalForces[i].m_linear);
// dgVector torque (body->m_alpha + internalForces[i].m_angular);
simd_type force =
simd_add_v((simd_type &)body->m_accel, (simd_type &)internalForces[i].m_linear);
simd_type torque =
simd_add_v((simd_type &)body->m_alpha, (simd_type &)internalForces[i].m_angular);
// dgVector accel (force.Scale (body->m_invMass.m_w));
simd_type accel = simd_mul_v(force, simd_set1(body->m_invMass.m_w));
// dgVector alpha (body->m_invWorldInertiaMatrix.RotateVector (torque));
simd_type alpha =
simd_mul_add_v(simd_mul_add_v(simd_mul_v((simd_type &)body->m_invWorldInertiaMatrix[0], simd_permut_v(torque, torque, PURMUT_MASK(0, 0, 0, 0))),
(simd_type &)body->m_invWorldInertiaMatrix[1], simd_permut_v(torque, torque, PURMUT_MASK(1, 1, 1, 1))),
(simd_type &)body->m_invWorldInertiaMatrix[2], simd_permut_v(torque, torque, PURMUT_MASK(2, 2, 2, 2)));
// body->m_veloc += accel.Scale(timeStep);
(simd_type &)body->m_veloc =
simd_mul_add_v((simd_type &)body->m_veloc, accel, timeStepSimd);
// body->m_omega += alpha.Scale(timeStep);
(simd_type &)body->m_omega =
simd_mul_add_v((simd_type &)body->m_omega, alpha, timeStepSimd);
// body->m_netForce += body->m_veloc;
(simd_type &)internalVeloc[i].m_linear =
simd_add_v((simd_type &)internalVeloc[i].m_linear, (simd_type &)body->m_veloc);
// body->m_netTorque += body->m_omega;
(simd_type &)internalVeloc[i].m_angular =
simd_add_v((simd_type &)internalVeloc[i].m_angular, (simd_type &)body->m_omega);
}
}
dgInt32 hasJointFeeback = 0;
for (dgInt32 i = 0; i < m_jointCount; i++) {
// maxForce = dgFloat32 (0.0f);
dgInt32 first = constraintArray[i].m_autoPairstart;
dgInt32 count = constraintArray[i].m_autoPaircount;
for (dgInt32 j = 0; j < count; j++) {
dgInt32 index = j + first;
dgFloat32 val = force[index];
NEWTON_ASSERT(dgCheckFloat(val));
// maxForce = GetMax (dgAbsf (val), maxForce);
jointForceFeeback[index][0] = val;
}
// if (constraintArray[i].m_joint->GetId() == dgContactConstraintId) {
// m_world->AddToBreakQueue ((dgContact*)constraintArray[i].m_joint, maxForce);
// }
// hasJointFeeback |= dgUnsigned32 (constraintArray[i].m_joint->m_updaFeedbackCallback);
hasJointFeeback |= (constraintArray[i].m_joint->m_updaFeedbackCallback ? 1 : 0);
// if (constraintArray[i].m_joint->m_updaFeedbackCallback) {
// constraintArray[i].m_joint->m_updaFeedbackCallback (*constraintArray[i].m_joint, m_timeStep, m_threadIndex);
// }
}
// simd_type invStepSimd;
// signMask = simd_set1 (invStep);
#ifdef DG_WIGHT_FINAL_RK4_DERIVATIVES
simd_type invStepSimd = simd_set1(invStep);
#endif
simd_type invTimeStepSimd = simd_set1(m_invTimeStep);
simd_type accelerationTolerance = simd_set1(maxAccNorm);
accelerationTolerance =
simd_mul_s(accelerationTolerance, accelerationTolerance);
for (dgInt32 i = 1; i < m_bodyCount; i++) {
dgBody *const body = bodyArray[i].m_body;
#ifdef DG_WIGHT_FINAL_RK4_DERIVATIVES
// body->m_veloc = internalVeloc[i].m_linear.Scale(invStep);
// body->m_omega = internalVeloc[i].m_angular.Scale(invStep);
(simd_type &)body->m_veloc = simd_mul_v((simd_type &)internalVeloc[i].m_linear, invStepSimd);
(simd_type &)body->m_omega = simd_mul_v((simd_type &)internalVeloc[i].m_angular, invStepSimd);
#endif
// dgVector accel = (body->m_veloc - body->m_netForce).Scale (m_invTimeStep);
// dgVector alpha = (body->m_omega - body->m_netTorque).Scale (m_invTimeStep);
simd_type accel = simd_mul_v(simd_sub_v((simd_type &)body->m_veloc, (simd_type &)body->m_netForce), invTimeStepSimd);
simd_type alpha = simd_mul_v(simd_sub_v((simd_type &)body->m_omega, (simd_type &)body->m_netTorque), invTimeStepSimd);
// if ((accel % accel) < maxAccNorm2) {
// accel = zero;
// }
// body->m_accel = accel;
// body->m_netForce = accel.Scale (body->m_mass[3]);
simd_type tmp = simd_mul_v(accel, accel);
tmp = simd_add_v(tmp, simd_move_hl_v(tmp, tmp));
tmp = simd_add_s(tmp, simd_permut_v(tmp, tmp, PURMUT_MASK(0, 0, 0, 1)));
tmp = simd_cmplt_s(tmp, accelerationTolerance);
(simd_type &)body->m_accel =
simd_andnot_v(accel, simd_permut_v(tmp, tmp, PURMUT_MASK(0, 0, 0, 0)));
(simd_type &)body->m_netForce =
simd_mul_v((simd_type &)body->m_accel, simd_set1(body->m_mass[3]));
// if ((alpha % alpha) < maxAccNorm2) {
// alpha = zero;
// }
// body->m_alpha = alpha;
tmp = simd_mul_v(alpha, alpha);
tmp = simd_add_v(tmp, simd_move_hl_v(tmp, tmp));
tmp = simd_add_s(tmp, simd_permut_v(tmp, tmp, PURMUT_MASK(0, 0, 0, 1)));
tmp = simd_cmplt_s(tmp, accelerationTolerance);
(simd_type &)body->m_alpha =
simd_andnot_v(alpha, simd_permut_v(tmp, tmp, PURMUT_MASK(0, 0, 0, 0)));
// alpha = body->m_matrix.UnrotateVector(alpha);
alpha =
simd_mul_v((simd_type &)body->m_matrix[0], (simd_type &)body->m_alpha);
alpha = simd_add_v(alpha, simd_move_hl_v(alpha, alpha));
alpha =
simd_add_s(alpha, simd_permut_v(alpha, alpha, PURMUT_MASK(0, 0, 0, 1)));
tmp = simd_mul_v((simd_type &)body->m_matrix[1], (simd_type &)body->m_alpha);
tmp = simd_add_v(tmp, simd_move_hl_v(tmp, tmp));
tmp = simd_add_s(tmp, simd_permut_v(tmp, tmp, PURMUT_MASK(0, 0, 0, 1)));
alpha = simd_pack_lo_v(alpha, tmp);
tmp = simd_mul_v((simd_type &)body->m_matrix[2], (simd_type &)body->m_alpha);
tmp = simd_add_v(tmp, simd_move_hl_v(tmp, tmp));
tmp = simd_add_s(tmp, simd_permut_v(tmp, tmp, PURMUT_MASK(0, 0, 0, 1)));
alpha = simd_permut_v(alpha, tmp, PURMUT_MASK(3, 0, 1, 0));
// body->m_netTorque = body->m_matrix.RotateVector (alpha.CompProduct(body->m_mass));
alpha = simd_mul_v(alpha, (simd_type &)body->m_mass);
(simd_type &)body->m_netTorque =
simd_mul_add_v(simd_mul_add_v(simd_mul_v((simd_type &)body->m_matrix[0], simd_permut_v(alpha, alpha, PURMUT_MASK(0, 0, 0, 0))),
(simd_type &)body->m_matrix[1], simd_permut_v(alpha, alpha, PURMUT_MASK(1, 1, 1, 1))),
(simd_type &)body->m_matrix[2], simd_permut_v(alpha, alpha, PURMUT_MASK(2, 2, 2, 2)));
}
if (hasJointFeeback) {
for (dgInt32 i = 0; i < m_jointCount; i++) {
if (constraintArray[i].m_joint->m_updaFeedbackCallback) {
constraintArray[i].m_joint->m_updaFeedbackCallback(
*constraintArray[i].m_joint, m_timeStep, m_threadIndex);
}
}
}
#endif
}
void dgJacobianMemory::CalculateForcesGameMode(dgInt32 iterations,
dgFloat32 maxAccNorm) const {
dgFloat32 *const force = m_force;
const dgJacobianPair *const Jt = m_Jt;
const dgJacobianPair *const JMinv = m_JMinv;
const dgFloat32 *const diagDamp = m_diagDamp;
const dgFloat32 *const invDJMinvJt = m_invDJMinvJt;
const dgBodyInfo *bodyArray = m_bodyArray;
dgFloat32 *const penetration = m_penetration;
const dgFloat32 *const externAccel = m_deltaAccel;
const dgFloat32 *const restitution = m_restitution;
dgFloat32 *const coordenateAccel = m_coordenateAccel;
dgJacobian *const internalVeloc = m_internalVeloc;
dgJacobian *const internalForces = m_internalForces;
;
dgFloat32 **const jointForceFeeback = m_jointFeebackForce;
const dgInt32 *const accelIsMortor = m_accelIsMotor;
const dgInt32 *const normalForceIndex = m_normalForceIndex;
const dgJointInfo *const constraintArray = m_constraintArray;
const dgFloat32 *const penetrationStiffness = m_penetrationStiffness;
const dgFloat32 *const lowerFrictionCoef = m_lowerBoundFrictionCoefficent;
const dgFloat32 *const upperFrictionCoef = m_upperBoundFrictionCoefficent;
dgVector zero(dgFloat32(0.0f), dgFloat32(0.0f), dgFloat32(0.0f),
dgFloat32(0.0f));
// steps = 4;
dgFloat32 invStep = (dgFloat32(1.0f) / dgFloat32(LINEAR_SOLVER_SUB_STEPS));
dgFloat32 timeStep = m_timeStep * invStep;
dgFloat32 invTimeStep = m_invTimeStep * dgFloat32(LINEAR_SOLVER_SUB_STEPS);
NEWTON_ASSERT(m_bodyArray[0].m_body == m_world->m_sentionelBody);
for (dgInt32 i = 1; i < m_bodyCount; i++) {
dgBody *const body = m_bodyArray[i].m_body;
body->m_netForce = body->m_veloc;
body->m_netTorque = body->m_omega;
internalVeloc[i].m_linear = zero;
internalVeloc[i].m_angular = zero;
internalForces[i].m_linear = zero;
internalForces[i].m_angular = zero;
}
internalVeloc[0].m_linear = zero;
internalVeloc[0].m_angular = zero;
internalForces[0].m_linear = zero;
internalForces[0].m_angular = zero;
for (dgInt32 i = 0; i < m_jointCount; i++) {
dgJacobian y0;
dgJacobian y1;
dgInt32 first = constraintArray[i].m_autoPairstart;
dgInt32 count = constraintArray[i].m_autoPaircount;
dgInt32 m0 = constraintArray[i].m_m0;
dgInt32 m1 = constraintArray[i].m_m1;
y0.m_linear = zero;
y0.m_angular = zero;
y1.m_linear = zero;
y1.m_angular = zero;
for (dgInt32 j = 0; j < count; j++) {
dgInt32 index = j + first;
dgFloat32 val = force[index];
NEWTON_ASSERT(dgCheckFloat(val));
y0.m_linear += Jt[index].m_jacobian_IM0.m_linear.Scale(val);
y0.m_angular += Jt[index].m_jacobian_IM0.m_angular.Scale(val);
y1.m_linear += Jt[index].m_jacobian_IM1.m_linear.Scale(val);
y1.m_angular += Jt[index].m_jacobian_IM1.m_angular.Scale(val);
}
internalForces[m0].m_linear += y0.m_linear;
internalForces[m0].m_angular += y0.m_angular;
internalForces[m1].m_linear += y1.m_linear;
internalForces[m1].m_angular += y1.m_angular;
}
// if (sleepCount) {
// for (dgInt32 i = 1; i < m_bodyCount; i ++) {
// dgBody* body;
// body = m_bodyArray[i].m_body;
// if (body->m_equilibrium)
// body = m_bodyArray[i].m_body;
// }
// }
dgFloat32 firstPassCoef = dgFloat32(0.0f);
dgInt32 maxPasses = iterations + DG_BASE_ITERATION_COUNT;
for (dgInt32 step = 0; step < LINEAR_SOLVER_SUB_STEPS; step++) {
for (dgInt32 curJoint = 0; curJoint < m_jointCount; curJoint++) {
dgJointAccelerationDecriptor joindDesc;
dgInt32 index = constraintArray[curJoint].m_autoPairstart;
joindDesc.m_rowsCount = constraintArray[curJoint].m_autoPaircount;
joindDesc.m_timeStep = timeStep;
joindDesc.m_invTimeStep = invTimeStep;
joindDesc.m_firstPassCoefFlag = firstPassCoef;
joindDesc.m_Jt = &Jt[index];
joindDesc.m_penetration = &penetration[index];
joindDesc.m_restitution = &restitution[index];
joindDesc.m_externAccelaration = &externAccel[index];
joindDesc.m_coordenateAccel = &coordenateAccel[index];
joindDesc.m_accelIsMotor = &accelIsMortor[index];
joindDesc.m_normalForceIndex = &normalForceIndex[index];
joindDesc.m_penetrationStiffness = &penetrationStiffness[index];
constraintArray[curJoint].m_joint->JointAccelerations(joindDesc);
}
firstPassCoef = dgFloat32(1.0f);
dgFloat32 accNorm = maxAccNorm * dgFloat32(2.0f);
for (dgInt32 passes = 0; (passes < maxPasses) && (accNorm > maxAccNorm);
passes++) {
accNorm = dgFloat32(0.0f);
for (dgInt32 curJoint = 0; curJoint < m_jointCount; curJoint++) {
dgInt32 index = constraintArray[curJoint].m_autoPairstart;
dgInt32 rowsCount = constraintArray[curJoint].m_autoPaircount;
dgInt32 m0 = constraintArray[curJoint].m_m0;
dgInt32 m1 = constraintArray[curJoint].m_m1;
dgVector linearM0(internalForces[m0].m_linear);
dgVector angularM0(internalForces[m0].m_angular);
dgVector linearM1(internalForces[m1].m_linear);
dgVector angularM1(internalForces[m1].m_angular);
for (dgInt32 k = 0; k < rowsCount; k++) {
dgVector acc(
JMinv[index].m_jacobian_IM0.m_linear.CompProduct(linearM0));
acc += JMinv[index].m_jacobian_IM0.m_angular.CompProduct(angularM0);
acc += JMinv[index].m_jacobian_IM1.m_linear.CompProduct(linearM1);
acc += JMinv[index].m_jacobian_IM1.m_angular.CompProduct(angularM1);
dgFloat32 a = coordenateAccel[index] - acc.m_x - acc.m_y - acc.m_z - force[index] * diagDamp[index];
dgFloat32 f = force[index] + invDJMinvJt[index] * a;
dgInt32 frictionIndex = normalForceIndex[index];
NEWTON_ASSERT(
((frictionIndex < 0) && (force[frictionIndex] == dgFloat32(1.0f))) || ((frictionIndex >= 0) && (force[frictionIndex] >= dgFloat32(0.0f))));
dgFloat32 frictionNormal = force[frictionIndex];
dgFloat32 lowerFrictionForce = frictionNormal * lowerFrictionCoef[index];
dgFloat32 upperFrictionForce = frictionNormal * upperFrictionCoef[index];
if (f > upperFrictionForce) {
a = dgFloat32(0.0f);
f = upperFrictionForce;
} else if (f < lowerFrictionForce) {
a = dgFloat32(0.0f);
f = lowerFrictionForce;
}
accNorm = GetMax(accNorm, dgAbsf(a));
dgFloat32 prevValue = f - force[index];
force[index] = f;
linearM0 += Jt[index].m_jacobian_IM0.m_linear.Scale(prevValue);
angularM0 += Jt[index].m_jacobian_IM0.m_angular.Scale(prevValue);
linearM1 += Jt[index].m_jacobian_IM1.m_linear.Scale(prevValue);
angularM1 += Jt[index].m_jacobian_IM1.m_angular.Scale(prevValue);
index++;
}
internalForces[m0].m_linear = linearM0;
internalForces[m0].m_angular = angularM0;
internalForces[m1].m_linear = linearM1;
internalForces[m1].m_angular = angularM1;
}
// accNorm = 1.0f;
}
for (dgInt32 i = 1; i < m_bodyCount; i++) {
dgBody *const body = bodyArray[i].m_body;
dgVector forceTmp(body->m_accel + internalForces[i].m_linear);
dgVector torque(body->m_alpha + internalForces[i].m_angular);
dgVector accel(forceTmp.Scale(body->m_invMass.m_w));
dgVector alpha(body->m_invWorldInertiaMatrix.RotateVector(torque));
body->m_veloc += accel.Scale(timeStep);
body->m_omega += alpha.Scale(timeStep);
internalVeloc[i].m_linear += body->m_veloc;
internalVeloc[i].m_angular += body->m_omega;
}
}
dgInt32 hasJointFeeback = 0;
for (dgInt32 i = 0; i < m_jointCount; i++) {
dgInt32 first = constraintArray[i].m_autoPairstart;
dgInt32 count = constraintArray[i].m_autoPaircount;
// maxForce = dgFloat32 (0.0f);
for (dgInt32 j = 0; j < count; j++) {
dgInt32 index = j + first;
dgFloat32 val = force[index];
// maxForce = GetMax (dgAbsf (val), maxForce);
NEWTON_ASSERT(dgCheckFloat(val));
jointForceFeeback[index][0] = val;
}
// if (constraintArray[i].m_joint->GetId() == dgContactConstraintId) {
// m_world->AddToBreakQueue ((dgContact*)constraintArray[i].m_joint, maxForce);
// }
hasJointFeeback |= (constraintArray[i].m_joint->m_updaFeedbackCallback ? 1 : 0);
// if (constraintArray[i].m_joint->m_updaFeedbackCallback) {
// constraintArray[i].m_joint->m_updaFeedbackCallback (*constraintArray[i].m_joint, m_timeStep, m_threadIndex);
// }
}
dgFloat32 maxAccNorm2 = maxAccNorm * maxAccNorm;
for (dgInt32 i = 1; i < m_bodyCount; i++) {
dgBody *const body = bodyArray[i].m_body;
#ifdef DG_WIGHT_FINAL_RK4_DERIVATIVES
body->m_veloc = internalVeloc[i].m_linear.Scale(invStep);
body->m_omega = internalVeloc[i].m_angular.Scale(invStep);
#endif
dgVector accel = (body->m_veloc - body->m_netForce).Scale(m_invTimeStep);
dgVector alpha = (body->m_omega - body->m_netTorque).Scale(m_invTimeStep);
if ((accel % accel) < maxAccNorm2) {
accel = zero;
}
if ((alpha % alpha) < maxAccNorm2) {
alpha = zero;
}
body->m_accel = accel;
body->m_alpha = alpha;
body->m_netForce = accel.Scale(body->m_mass[3]);
alpha = body->m_matrix.UnrotateVector(alpha);
body->m_netTorque = body->m_matrix.RotateVector(
alpha.CompProduct(body->m_mass));
}
if (hasJointFeeback) {
for (dgInt32 i = 0; i < m_jointCount; i++) {
if (constraintArray[i].m_joint->m_updaFeedbackCallback) {
constraintArray[i].m_joint->m_updaFeedbackCallback(
reinterpret_cast<const NewtonJoint *>(constraintArray[i].m_joint), m_timeStep, m_threadIndex);
}
}
}
}
////////////////////////////////////////////////////////////////////////////////////////
//
// Parallel solver
//
////////////////////////////////////////////////////////////////////////////////////////
void dgParallelSolverBodyInertia::ThreadExecute() {
// dgUnsigned32 ticks;
// ticks = m_world->m_getPerformanceCount();
if (m_useSimd) {
for (dgInt32 j = 0; j < m_count; j++) {
dgBody *body;
body = m_bodyArray[j].m_body;
NEWTON_ASSERT(body->m_invMass.m_w > dgFloat32(0.0f));
body->AddDamingAcceleration();
body->CalcInvInertiaMatrixSimd();
}
} else {
for (dgInt32 j = 0; j < m_count; j++) {
dgBody *body;
body = m_bodyArray[j].m_body;
NEWTON_ASSERT(body->m_invMass.m_w > dgFloat32(0.0f));
body->AddDamingAcceleration();
body->CalcInvInertiaMatrix();
}
}
// m_world->m_perfomanceCounters[m_threadIndex][m_dynamicsSolveSpanningTreeTicks] += (m_world->m_getPerformanceCount() - ticks);
}
void dgParallelSolverBuildJacobianRows::ThreadExecute() {
// dgUnsigned32 ticks;
// ticks = m_world->m_getPerformanceCount();
dgFloat32 *const force = m_force;
const dgJacobianPair *const Jt = m_Jt;
dgJacobianPair *const JMinv = m_JMinv;
dgFloat32 *const diagDamp = m_diagDamp;
dgFloat32 *const extAccel = m_deltaAccel;
dgFloat32 *const invDJMinvJt = m_invDJMinvJt;
dgFloat32 *const coordenateAccel = m_coordenateAccel;
dgFloat32 **const jointForceFeeback = m_jointFeebackForce;
// dgInt32* const accelIsMotor = solverMemory.m_accelIsMotor;
dgBodyInfo *const bodyArray = m_bodyArray;
dgJointInfo *const constraintArray = m_constraintArray;
if (m_useSimd) {
#ifdef DG_BUILD_SIMD_CODE
simd_type zero;
zero = simd_set1(dgFloat32(0.0f));
for (dgInt32 k = 0; k < m_count; k++) {
dgInt32 m0;
dgInt32 m1;
dgInt32 index;
dgInt32 count;
dgFloat32 diag;
dgFloat32 stiffness;
simd_type tmp0;
simd_type tmp1;
simd_type invMass0;
simd_type invMass1;
simd_type tmpDiag;
simd_type tmpAccel;
dgBody *body0;
dgBody *body1;
index = constraintArray[k].m_autoPairstart;
count = constraintArray[k].m_autoPaircount;
m0 = constraintArray[k].m_m0;
m1 = constraintArray[k].m_m1;
NEWTON_ASSERT(m0 >= 0);
NEWTON_ASSERT(m0 < m_bodyCount);
NEWTON_ASSERT(m1 >= 0);
NEWTON_ASSERT(m1 < m_bodyCount);
body0 = bodyArray[m0].m_body;
// invMass0 = body0->m_invMass[3];
invMass0 = simd_set1(body0->m_invMass[3]);
NEWTON_ASSERT((dgUnsigned64(&body0->m_invWorldInertiaMatrix) & 0x0f) == 0);
const dgMatrix &invInertia0 = body0->m_invWorldInertiaMatrix;
body1 = bodyArray[m1].m_body;
// invMass1 = body1->m_invMass[3];
invMass1 = simd_set1(body1->m_invMass[3]);
NEWTON_ASSERT((dgUnsigned64(&body1->m_invWorldInertiaMatrix) & 0x0f) == 0);
const dgMatrix &invInertia1 = body1->m_invWorldInertiaMatrix;
for (dgInt32 i = 0; i < count; i++) {
// JMinv[index].m_jacobian_IM0.m_linear = Jt[index].m_jacobian_IM0.m_linear.Scale (invMass0);
// JMinv[index].m_jacobian_IM0.m_angular = invInertia0.UnrotateVector (Jt[index].m_jacobian_IM0.m_angular);
// dgVector tmpDiag (JMinv[index].m_jacobian_IM0.m_linear.CompProduct(Jt[index].m_jacobian_IM0.m_linear));
// tmpDiag += JMinv[index].m_jacobian_IM0.m_angular.CompProduct(Jt[index].m_jacobian_IM0.m_angular);
// dgVector tmpAccel (JMinv[index].m_jacobian_IM0.m_linear.CompProduct(body0->m_accel));
// tmpAccel += JMinv[index].m_jacobian_IM0.m_angular.CompProduct(body0->m_alpha);
((simd_type &)JMinv[index].m_jacobian_IM0.m_linear) =
simd_mul_v((simd_type &)Jt[index].m_jacobian_IM0.m_linear, invMass0);
tmp0 = (simd_type &)Jt[index].m_jacobian_IM0.m_angular;
tmp1 =
simd_mul_v((simd_type &)invInertia0.m_front, simd_permut_v(tmp0, tmp0, PURMUT_MASK(3, 0, 0, 0)));
tmp1 =
simd_mul_add_v(tmp1, (simd_type &)invInertia0.m_up, simd_permut_v(tmp0, tmp0, PURMUT_MASK(3, 1, 1, 1)));
((simd_type &)JMinv[index].m_jacobian_IM0.m_angular) =
simd_mul_add_v(tmp1, (simd_type &)invInertia0.m_right, simd_permut_v(tmp0, tmp0, PURMUT_MASK(3, 2, 2, 2)));
tmpDiag =
simd_mul_v((simd_type &)JMinv[index].m_jacobian_IM0.m_linear, (simd_type &)Jt[index].m_jacobian_IM0.m_linear);
tmpDiag =
simd_mul_add_v(tmpDiag, (simd_type &)JMinv[index].m_jacobian_IM0.m_angular, (simd_type &)Jt[index].m_jacobian_IM0.m_angular);
tmpAccel =
simd_mul_v((simd_type &)JMinv[index].m_jacobian_IM0.m_linear, (simd_type &)body0->m_accel);
tmpAccel =
simd_mul_add_v(tmpAccel, (simd_type &)JMinv[index].m_jacobian_IM0.m_angular, (simd_type &)body0->m_alpha);
// JMinv[index].m_jacobian_IM1.m_linear = Jt[index].m_jacobian_IM1.m_linear.Scale (invMass1);
// JMinv[index].m_jacobian_IM1.m_angular = invInertia1.UnrotateVector (Jt[index].m_jacobian_IM1.m_angular);
// tmpDiag += JMinv[index].m_jacobian_IM1.m_linear.CompProduct(Jt[index].m_jacobian_IM1.m_linear);
// tmpDiag += JMinv[index].m_jacobian_IM1.m_angular.CompProduct(Jt[index].m_jacobian_IM1.m_angular);
// tmpAccel += JMinv[index].m_jacobian_IM1.m_linear.CompProduct(body1->m_accel);
// tmpAccel += JMinv[index].m_jacobian_IM1.m_angular.CompProduct(body1->m_alpha);
((simd_type &)JMinv[index].m_jacobian_IM1.m_linear) =
simd_mul_v((simd_type &)Jt[index].m_jacobian_IM1.m_linear, invMass1);
tmp0 = (simd_type &)Jt[index].m_jacobian_IM1.m_angular;
tmp1 =
simd_mul_v((simd_type &)invInertia1.m_front, simd_permut_v(tmp0, tmp0, PURMUT_MASK(3, 0, 0, 0)));
tmp1 =
simd_mul_add_v(tmp1, (simd_type &)invInertia1.m_up, simd_permut_v(tmp0, tmp0, PURMUT_MASK(3, 1, 1, 1)));
((simd_type &)JMinv[index].m_jacobian_IM1.m_angular) =
simd_mul_add_v(tmp1, (simd_type &)invInertia1.m_right, simd_permut_v(tmp0, tmp0, PURMUT_MASK(3, 2, 2, 2)));
tmpDiag =
simd_mul_add_v(tmpDiag, (simd_type &)JMinv[index].m_jacobian_IM1.m_linear, (simd_type &)Jt[index].m_jacobian_IM1.m_linear);
tmpDiag =
simd_mul_add_v(tmpDiag, (simd_type &)JMinv[index].m_jacobian_IM1.m_angular, (simd_type &)Jt[index].m_jacobian_IM1.m_angular);
tmpAccel =
simd_mul_add_v(tmpAccel, (simd_type &)JMinv[index].m_jacobian_IM1.m_linear, (simd_type &)body1->m_accel);
tmpAccel =
simd_mul_add_v(tmpAccel, (simd_type &)JMinv[index].m_jacobian_IM1.m_angular, (simd_type &)body1->m_alpha);
// coordenateAccel[index] -= (tmpAccel.m_x + tmpAccel.m_y + tmpAccel.m_z);
tmpAccel = simd_add_v(tmpAccel, simd_move_hl_v(tmpAccel, tmpAccel));
tmpAccel =
simd_sub_s(zero, simd_add_s(tmpAccel, simd_permut_v(tmpAccel, tmpAccel, PURMUT_MASK(3, 3, 3, 1))));
simd_store_s(tmpAccel, &extAccel[index]);
simd_store_s(simd_add_s(simd_load_s(coordenateAccel[index]), tmpAccel),
&coordenateAccel[index]);
// force[index] = bilateralForceBounds[index].m_jointForce[0];
force[index] = jointForceFeeback[index][0];
NEWTON_ASSERT(diagDamp[index] >= dgFloat32(0.1f));
NEWTON_ASSERT(diagDamp[index] <= dgFloat32(100.0f));
stiffness = DG_PSD_DAMP_TOL * diagDamp[index];
// diag = (tmpDiag.m_x + tmpDiag.m_y + tmpDiag.m_z);
tmpDiag = simd_add_v(tmpDiag, simd_move_hl_v(tmpDiag, tmpDiag));
simd_store_s(
simd_add_s(tmpDiag, simd_permut_v(tmpDiag, tmpDiag, PURMUT_MASK(3, 3, 3, 1))),
&diag);
NEWTON_ASSERT(diag > dgFloat32(0.0f));
diagDamp[index] = diag * stiffness;
diag *= (dgFloat32(1.0f) + stiffness);
invDJMinvJt[index] = dgFloat32(1.0f) / diag;
index++;
}
}
#endif
} else {
for (dgInt32 k = 0; k < m_count; k++) {
dgInt32 m0;
dgInt32 m1;
dgInt32 index;
dgInt32 count;
dgFloat32 invMass0;
dgFloat32 invMass1;
dgBody *body0;
dgBody *body1;
index = constraintArray[k].m_autoPairstart;
count = constraintArray[k].m_autoPaircount;
m0 = constraintArray[k].m_m0;
m1 = constraintArray[k].m_m1;
NEWTON_ASSERT(m0 >= 0);
NEWTON_ASSERT(m0 < m_bodyCount);
body0 = bodyArray[m0].m_body;
invMass0 = body0->m_invMass[3];
const dgMatrix &invInertia0 = body0->m_invWorldInertiaMatrix;
NEWTON_ASSERT(m1 >= 0);
NEWTON_ASSERT(m1 < m_bodyCount);
body1 = bodyArray[m1].m_body;
invMass1 = body1->m_invMass[3];
const dgMatrix &invInertia1 = body1->m_invWorldInertiaMatrix;
for (dgInt32 i = 0; i < count; i++) {
dgFloat32 diag;
dgFloat32 stiffness;
dgFloat32 extenalAcceleration;
JMinv[index].m_jacobian_IM0.m_linear =
Jt[index].m_jacobian_IM0.m_linear.Scale(invMass0);
JMinv[index].m_jacobian_IM0.m_angular = invInertia0.UnrotateVector(
Jt[index].m_jacobian_IM0.m_angular);
dgVector tmpDiag(
JMinv[index].m_jacobian_IM0.m_linear.CompProduct(
Jt[index].m_jacobian_IM0.m_linear));
tmpDiag += JMinv[index].m_jacobian_IM0.m_angular.CompProduct(
Jt[index].m_jacobian_IM0.m_angular);
dgVector tmpAccel(
JMinv[index].m_jacobian_IM0.m_linear.CompProduct(body0->m_accel));
tmpAccel += JMinv[index].m_jacobian_IM0.m_angular.CompProduct(
body0->m_alpha);
JMinv[index].m_jacobian_IM1.m_linear =
Jt[index].m_jacobian_IM1.m_linear.Scale(invMass1);
JMinv[index].m_jacobian_IM1.m_angular = invInertia1.UnrotateVector(
Jt[index].m_jacobian_IM1.m_angular);
tmpDiag += JMinv[index].m_jacobian_IM1.m_linear.CompProduct(
Jt[index].m_jacobian_IM1.m_linear);
tmpDiag += JMinv[index].m_jacobian_IM1.m_angular.CompProduct(
Jt[index].m_jacobian_IM1.m_angular);
tmpAccel += JMinv[index].m_jacobian_IM1.m_linear.CompProduct(
body1->m_accel);
tmpAccel += JMinv[index].m_jacobian_IM1.m_angular.CompProduct(
body1->m_alpha);
extenalAcceleration = -(tmpAccel.m_x + tmpAccel.m_y + tmpAccel.m_z);
extAccel[index] = extenalAcceleration;
coordenateAccel[index] += extenalAcceleration;
force[index] = jointForceFeeback[index][0];
NEWTON_ASSERT(diagDamp[index] >= dgFloat32(0.1f));
NEWTON_ASSERT(diagDamp[index] <= dgFloat32(100.0f));
stiffness = DG_PSD_DAMP_TOL * diagDamp[index];
diag = (tmpDiag.m_x + tmpDiag.m_y + tmpDiag.m_z);
NEWTON_ASSERT(diag > dgFloat32(0.0f));
diagDamp[index] = diag * stiffness;
diag *= (dgFloat32(1.0f) + stiffness);
// solverMemory.m_diagJMinvJt[index] = diag;
invDJMinvJt[index] = dgFloat32(1.0f) / diag;
index++;
}
}
}
// m_world->m_perfomanceCounters[m_threadIndex][m_dynamicsSolveSpanningTreeTicks] += (m_world->m_getPerformanceCount() - ticks);
}
void dgParallelSolverBuildJacobianMatrix::ThreadExecute() {
// dgInt32 count;
// dgInt32 rowCount;
// dgInt32 jointSolved;
dgContraintDescritor constraintParams;
dgJointInfo *const constraintArray = m_constraintArray;
// ticks = m_world->m_getPerformanceCount();
constraintParams.m_world = m_world;
constraintParams.m_threadIndex = m_threadIndex;
constraintParams.m_timestep = m_timestep;
constraintParams.m_invTimestep = m_invTimestep;
dgJacobianMemory &solverMemory = *m_solverMemory;
dgInt32 count = m_count;
dgInt32 jointSolved = m_jointSolved;
for (dgInt32 j = 0; j < count; j++) {
dgConstraint *const constraint = constraintArray[j].m_joint;
if (constraint->m_isUnilateral ^ m_bitMode) {
dgInt32 dof = dgInt32(constraint->m_maxDOF);
jointSolved++;
NEWTON_ASSERT(dof <= DG_CONSTRAINT_MAX_ROWS);
for (dgInt32 i = 0; i < dof; i++) {
constraintParams.m_forceBounds[i].m_low = DG_MIN_BOUND;
constraintParams.m_forceBounds[i].m_upper = DG_MAX_BOUND;
constraintParams.m_forceBounds[i].m_jointForce = NULL;
constraintParams.m_forceBounds[i].m_normalIndex =
DG_BILATERAL_CONSTRAINT;
}
NEWTON_ASSERT(constraint->m_body0);
NEWTON_ASSERT(constraint->m_body1);
constraint->m_body0->m_inCallback = true;
constraint->m_body1->m_inCallback = true;
dof = dgInt32(constraint->JacobianDerivative(constraintParams));
constraint->m_body0->m_inCallback = false;
constraint->m_body1->m_inCallback = false;
dgInt32 m0 =
(constraint->m_body0->m_invMass.m_w != dgFloat32(0.0f)) ? constraint->m_body0->m_index : 0;
dgInt32 m1 =
(constraint->m_body1->m_invMass.m_w != dgFloat32(0.0f)) ? constraint->m_body1->m_index : 0;
m_world->dgGetUserLock();
dgInt32 rowCount = m_rowsCount[0];
m_rowsCount[0] = rowCount + ((dof & (DG_SIMD_WORD_SIZE - 1)) ? ((dof & (-DG_SIMD_WORD_SIZE)) + DG_SIMD_WORD_SIZE) : dof);
m_world->dgReleasedUserLock();
constraintArray[j].m_autoPairstart = rowCount;
constraintArray[j].m_autoPaircount = dgInt32(dof);
constraintArray[j].m_autoPairActiveCount = dgInt32(dof);
constraintArray[j].m_m0 = m0;
constraintArray[j].m_m1 = m1;
for (dgInt32 i = 0; i < dof; i++) {
NEWTON_ASSERT(constraintParams.m_forceBounds[i].m_jointForce);
solverMemory.m_Jt[rowCount] = constraintParams.m_jacobian[i];
NEWTON_ASSERT(constraintParams.m_jointStiffness[i] >= dgFloat32(0.1f));
NEWTON_ASSERT(constraintParams.m_jointStiffness[i] <= dgFloat32(100.0f));
solverMemory.m_diagDamp[rowCount] =
constraintParams.m_jointStiffness[i];
solverMemory.m_coordenateAccel[rowCount] =
constraintParams.m_jointAccel[i];
solverMemory.m_accelIsMotor[rowCount] = dgInt32(
constraintParams.m_isMotor[i]);
solverMemory.m_restitution[rowCount] =
constraintParams.m_restitution[i];
solverMemory.m_penetration[rowCount] =
constraintParams.m_penetration[i];
solverMemory.m_penetrationStiffness[rowCount] =
constraintParams.m_penetrationStiffness[i];
solverMemory.m_lowerBoundFrictionCoefficent[rowCount] =
constraintParams.m_forceBounds[i].m_low;
solverMemory.m_upperBoundFrictionCoefficent[rowCount] =
constraintParams.m_forceBounds[i].m_upper;
solverMemory.m_jointFeebackForce[rowCount] =
constraintParams.m_forceBounds[i].m_jointForce;
solverMemory.m_normalForceIndex[rowCount] =
constraintParams.m_forceBounds[i].m_normalIndex + ((constraintParams.m_forceBounds[i].m_normalIndex >= 0) ? (rowCount - i) : 0);
rowCount++;
}
#ifdef _DEBUG
for (dgInt32 i = 0; i < ((rowCount + 3) & 0xfffc) - rowCount; i++) {
solverMemory.m_diagDamp[rowCount + i] = dgFloat32(0.0f);
solverMemory.m_coordenateAccel[rowCount + i] = dgFloat32(0.0f);
solverMemory.m_restitution[rowCount + i] = dgFloat32(0.0f);
solverMemory.m_penetration[rowCount + i] = dgFloat32(0.0f);
solverMemory.m_penetrationStiffness[rowCount + i] = dgFloat32(0.0f);
solverMemory.m_lowerBoundFrictionCoefficent[rowCount + i] = dgFloat32(
0.0f);
solverMemory.m_upperBoundFrictionCoefficent[rowCount + i] = dgFloat32(
0.0f);
solverMemory.m_jointFeebackForce[rowCount + i] = 0;
solverMemory.m_normalForceIndex[rowCount + i] = 0;
}
#endif
// rowCount = (rowCount & (DG_SIMD_WORD_SIZE - 1)) ? ((rowCount & (-DG_SIMD_WORD_SIZE)) + DG_SIMD_WORD_SIZE) : rowCount;
// NEWTON_ASSERT ((rowCount & (DG_SIMD_WORD_SIZE - 1)) == 0);
}
}
m_jointSolved = jointSolved;
// m_world->m_perfomanceCounters[m_threadIndex][m_dynamicsSolveSpanningTreeTicks] += (m_world->m_getPerformanceCount() - ticks);
}
dgInt32 dgWorldDynamicUpdate::GetJacobialDerivativesParallel(
const dgIsland &island, bool bitMode, dgInt32 rowCount, dgFloat32 timestep) {
dgInt32 chunkSizes[DG_MAXIMUN_THREADS];
dgJointInfo *const constraintArray = &m_constraintArray[island.m_jointStart];
dgInt32 acc = 0;
dgInt32 threadCounts = dgInt32(m_world->m_numberOfTheads);
dgInt32 jointCount = island.m_jointCount;
m_world->m_threadsManager.CalculateChunkSizes(jointCount, chunkSizes);
for (dgInt32 threadIndex = 0; threadIndex < threadCounts; threadIndex++) {
m_parallelSolverBuildJacobianMatrix[threadIndex].m_bitMode = bitMode;
m_parallelSolverBuildJacobianMatrix[threadIndex].m_world = m_world;
m_parallelSolverBuildJacobianMatrix[threadIndex].m_jointSolved = 0;
m_parallelSolverBuildJacobianMatrix[threadIndex].m_count =
chunkSizes[threadIndex];
m_parallelSolverBuildJacobianMatrix[threadIndex].m_timestep = timestep;
m_parallelSolverBuildJacobianMatrix[threadIndex].m_invTimestep = dgFloat32(
1.0f / timestep);
m_parallelSolverBuildJacobianMatrix[threadIndex].m_threadIndex =
threadIndex;
m_parallelSolverBuildJacobianMatrix[threadIndex].m_rowsCount = &rowCount;
m_parallelSolverBuildJacobianMatrix[threadIndex].m_constraintArray =
&constraintArray[acc];
m_parallelSolverBuildJacobianMatrix[threadIndex].m_solverMemory =
&m_solverMemory[0];
m_world->m_threadsManager.SubmitJob(
&m_parallelSolverBuildJacobianMatrix[threadIndex]);
acc += chunkSizes[threadIndex];
}
m_world->m_threadsManager.SynchronizationBarrier();
return rowCount;
}
void dgWorldDynamicUpdate::BuildJacobianMatrixParallel(const dgIsland &island,
dgFloat32 timestep, dgInt32 archModel) {
NEWTON_ASSERT(island.m_bodyCount >= 2);
//NEWTON_ASSERT (island.m_jointCount >= 1);
dgInt32 threads = dgInt32(m_world->m_numberOfTheads);
dgJacobianMemory &solverMemory = m_solverMemory[0];
dgInt32 bodyCount = island.m_bodyCount;
dgBodyInfo *const bodyArray = &m_bodyArray[island.m_bodyStart];
bodyArray[0].m_body = m_world->GetSentinelBody();
NEWTON_ASSERT(
(bodyArray[0].m_body->m_accel % bodyArray[0].m_body->m_accel) == dgFloat32(0.0f));
NEWTON_ASSERT(
(bodyArray[0].m_body->m_alpha % bodyArray[0].m_body->m_alpha) == dgFloat32(0.0f));
dgInt32 acc = 0;
dgInt32 chunkSizes[DG_MAXIMUN_THREADS];
m_world->m_threadsManager.CalculateChunkSizes(bodyCount - 1, chunkSizes);
for (dgInt32 threadIndex = 0; threadIndex < threads; threadIndex++) {
m_parallelBodyInertiaMatrix[threadIndex].m_useSimd = archModel;
m_parallelBodyInertiaMatrix[threadIndex].m_bodyArray = &bodyArray[acc + 1];
m_parallelBodyInertiaMatrix[threadIndex].m_threadIndex = threadIndex;
m_parallelBodyInertiaMatrix[threadIndex].m_world = m_world;
m_parallelBodyInertiaMatrix[threadIndex].m_count = chunkSizes[threadIndex];
m_world->m_threadsManager.SubmitJob(
&m_parallelBodyInertiaMatrix[threadIndex]);
acc += chunkSizes[threadIndex];
}
m_world->m_threadsManager.SynchronizationBarrier();
while (bodyCount > solverMemory.m_maxBodiesCount) {
m_world->dgGetUserLock();
ReallocIntenalForcesMemory(bodyCount, 0);
m_world->dgReleasedUserLock();
}
dgInt32 jointCount = island.m_jointCount;
dgJointInfo *const constraintArray = &m_constraintArray[island.m_jointStart];
solverMemory.m_constraintArray = constraintArray;
dgInt32 maxRowCount = 0;
for (dgInt32 j = 0; j < jointCount; j++) {
dgConstraint *const constraint = constraintArray[j].m_joint;
;
dgInt32 dof =
dgInt32(
(constraint->m_maxDOF & (DG_SIMD_WORD_SIZE - 1)) ? ((constraint->m_maxDOF & (-DG_SIMD_WORD_SIZE)) + DG_SIMD_WORD_SIZE) : constraint->m_maxDOF);
maxRowCount += dof;
}
while (maxRowCount > solverMemory.m_maxJacobiansCount) {
m_world->dgGetUserLock();
ReallocJacobiansMemory(solverMemory.m_maxJacobiansCount * 2, 0);
m_world->dgReleasedUserLock();
}
dgInt32 rowCount = 0;
if (island.m_hasUnilateralJoints) {
rowCount = GetJacobialDerivativesParallel(island, false, rowCount,
timestep);
}
rowCount = GetJacobialDerivativesParallel(island, true, rowCount, timestep);
solverMemory.m_rowCount = rowCount;
solverMemory.m_bodyCount = bodyCount;
solverMemory.m_bodyArray = bodyArray;
solverMemory.m_jointCount = jointCount;
solverMemory.m_timeStep = timestep;
solverMemory.m_invTimeStep = dgFloat32(1.0f) / solverMemory.m_timeStep;
acc = 0;
m_world->m_threadsManager.CalculateChunkSizes(jointCount, chunkSizes);
for (dgInt32 threadIndex = 0; threadIndex < threads; threadIndex++) {
m_parallelSolverBuildJacobianRows[threadIndex].m_useSimd = archModel;
m_parallelSolverBuildJacobianRows[threadIndex].m_bodyCount = bodyCount;
m_parallelSolverBuildJacobianRows[threadIndex].m_bodyArray = bodyArray;
m_parallelSolverBuildJacobianRows[threadIndex].m_threadIndex = threadIndex;
m_parallelSolverBuildJacobianRows[threadIndex].m_world = m_world;
m_parallelSolverBuildJacobianRows[threadIndex].m_count =
chunkSizes[threadIndex];
m_parallelSolverBuildJacobianRows[threadIndex].m_force =
solverMemory.m_force;
m_parallelSolverBuildJacobianRows[threadIndex].m_Jt = solverMemory.m_Jt;
m_parallelSolverBuildJacobianRows[threadIndex].m_JMinv =
solverMemory.m_JMinv;
m_parallelSolverBuildJacobianRows[threadIndex].m_diagDamp =
solverMemory.m_diagDamp;
m_parallelSolverBuildJacobianRows[threadIndex].m_deltaAccel =
solverMemory.m_deltaAccel;
m_parallelSolverBuildJacobianRows[threadIndex].m_invDJMinvJt =
solverMemory.m_invDJMinvJt;
m_parallelSolverBuildJacobianRows[threadIndex].m_coordenateAccel =
solverMemory.m_coordenateAccel;
m_parallelSolverBuildJacobianRows[threadIndex].m_jointFeebackForce =
solverMemory.m_jointFeebackForce;
m_parallelSolverBuildJacobianRows[threadIndex].m_constraintArray =
&constraintArray[acc];
m_world->m_threadsManager.SubmitJob(
&m_parallelSolverBuildJacobianRows[threadIndex]);
acc += chunkSizes[threadIndex];
}
m_world->m_threadsManager.SynchronizationBarrier();
}
void dgParallelSolverClear::ThreadExecute() {
if (m_useSimd) {
#ifdef DG_BUILD_SIMD_CODE
simd_type zero;
zero = simd_set1(dgFloat32(0.0f));
for (dgInt32 i = 0; i < m_count; i++) {
dgBody *body;
body = m_bodyArray[i].m_body;
((simd_type &)body->m_netForce) = zero;
((simd_type &)body->m_netTorque) = zero;
((simd_type &)m_internalVeloc[i].m_linear) = zero;
((simd_type &)m_internalVeloc[i].m_angular) = zero;
((simd_type &)m_internalForces[i].m_linear) = zero;
((simd_type &)m_internalForces[i].m_angular) = zero;
}
#endif
} else {
dgVector zero(dgFloat32(0.0f), dgFloat32(0.0f), dgFloat32(0.0f),
dgFloat32(0.0f));
for (dgInt32 i = 0; i < m_count; i++) {
dgBody *body;
body = m_bodyArray[i].m_body;
body->m_netForce = body->m_veloc;
body->m_netTorque = body->m_omega;
m_internalVeloc[i].m_linear = zero;
m_internalVeloc[i].m_angular = zero;
m_internalForces[i].m_linear = zero;
m_internalForces[i].m_angular = zero;
}
}
// m_world->m_perfomanceCounters[m_threadIndex][m_dynamicsSolveSpanningTreeTicks] += (m_world->m_getPerformanceCount() - ticks);
}
void dgParallelSolverInitInternalForces::ThreadExecute() {
// dgUnsigned32 ticks;
// ticks = m_world->m_getPerformanceCount();
if (m_useSimd) {
#ifdef DG_BUILD_SIMD_CODE
simd_type zero;
zero = simd_set1(dgFloat32(0.0f));
for (dgInt32 i = 0; i < m_count; i++) {
dgInt32 m0;
dgInt32 m1;
dgInt32 first;
dgInt32 count;
simd_type y0_linear;
simd_type y0_angular;
simd_type y1_linear;
simd_type y1_angular;
first = m_constraintArray[i].m_autoPairstart;
count = m_constraintArray[i].m_autoPairActiveCount;
m0 = m_constraintArray[i].m_m0;
m1 = m_constraintArray[i].m_m1;
// dgJacobian y0 (internalForces[k0]);
// dgJacobian y1 (internalForces[k1]);
y0_linear = zero;
y0_angular = zero;
y1_linear = zero;
y1_angular = zero;
for (dgInt32 j = 0; j < count; j++) {
dgInt32 index;
simd_type tmp0;
index = j + first;
// val = force[index];
tmp0 = simd_set1(m_force[index]);
// y0.m_linear += Jt[index].m_jacobian_IM0.m_linear.Scale (val);
// y0.m_angular += Jt[index].m_jacobian_IM0.m_angular.Scale (val);
// y1.m_linear += Jt[index].m_jacobian_IM1.m_linear.Scale (val);
// y1.m_angular += Jt[index].m_jacobian_IM1.m_angular.Scale (val);
y0_linear =
simd_mul_add_v(y0_linear, (simd_type &)m_Jt[index].m_jacobian_IM0.m_linear, tmp0);
y0_angular =
simd_mul_add_v(y0_angular, (simd_type &)m_Jt[index].m_jacobian_IM0.m_angular, tmp0);
y1_linear =
simd_mul_add_v(y1_linear, (simd_type &)m_Jt[index].m_jacobian_IM1.m_linear, tmp0);
y1_angular =
simd_mul_add_v(y1_angular, (simd_type &)m_Jt[index].m_jacobian_IM1.m_angular, tmp0);
}
// internalForces[k0] = y0;
// internalForces[k1] = y1;
// m_world->dgGetUserLock();
m_world->dgGetIndirectLock(&m_locks[m0]);
(simd_type &)m_internalForces[m0].m_linear =
simd_add_v((simd_type &)m_internalForces[m0].m_linear, y0_linear);
(simd_type &)m_internalForces[m0].m_angular =
simd_add_v((simd_type &)m_internalForces[m0].m_angular, y0_angular);
m_world->dgReleaseIndirectLock(&m_locks[m0]);
m_world->dgGetIndirectLock(&m_locks[m1]);
(simd_type &)m_internalForces[m1].m_linear =
simd_add_v((simd_type &)m_internalForces[m1].m_linear, y1_linear);
(simd_type &)m_internalForces[m1].m_angular =
simd_add_v((simd_type &)m_internalForces[m1].m_angular, y1_angular);
m_world->dgReleaseIndirectLock(&m_locks[m1]);
// m_world->dgReleasedUserLock();
}
#endif
} else {
dgVector zero(dgFloat32(0.0f), dgFloat32(0.0f), dgFloat32(0.0f),
dgFloat32(0.0f));
for (dgInt32 i = 0; i < m_count; i++) {
dgInt32 m0;
dgInt32 m1;
dgInt32 first;
dgInt32 count;
dgFloat32 val;
dgJacobian y0;
dgJacobian y1;
first = m_constraintArray[i].m_autoPairstart;
count = m_constraintArray[i].m_autoPaircount;
m0 = m_constraintArray[i].m_m0;
m1 = m_constraintArray[i].m_m1;
y0.m_linear = zero;
y0.m_angular = zero;
y1.m_linear = zero;
y1.m_angular = zero;
for (dgInt32 j = 0; j < count; j++) {
dgInt32 index;
index = j + first;
val = m_force[index];
NEWTON_ASSERT(dgCheckFloat(val));
y0.m_linear += m_Jt[index].m_jacobian_IM0.m_linear.Scale(val);
y0.m_angular += m_Jt[index].m_jacobian_IM0.m_angular.Scale(val);
y1.m_linear += m_Jt[index].m_jacobian_IM1.m_linear.Scale(val);
y1.m_angular += m_Jt[index].m_jacobian_IM1.m_angular.Scale(val);
}
m_world->dgGetIndirectLock(&m_locks[m0]);
m_internalForces[m0].m_linear += y0.m_linear;
m_internalForces[m0].m_angular += y0.m_angular;
m_world->dgReleaseIndirectLock(&m_locks[m0]);
m_world->dgGetIndirectLock(&m_locks[m1]);
m_internalForces[m1].m_linear += y1.m_linear;
m_internalForces[m1].m_angular += y1.m_angular;
m_world->dgReleaseIndirectLock(&m_locks[m1]);
}
}
// m_world->m_perfomanceCounters[m_threadIndex][m_dynamicsSolveSpanningTreeTicks] += (m_world->m_getPerformanceCount() - ticks);
}
void dgParallelSolverJointAcceleration::ThreadExecute() {
// dgUnsigned32 ticks;
// ticks = m_world->m_getPerformanceCount();
if (m_useSimd) {
dgInt32 currJoint;
currJoint = m_jointStart;
for (dgInt32 i = 0; i < m_count; i++) {
dgInt32 index;
dgJointAccelerationDecriptor joindDesc;
index = m_constraintArray[currJoint].m_autoPairstart;
joindDesc.m_rowsCount = m_constraintArray[currJoint].m_autoPaircount;
joindDesc.m_timeStep = m_timeStep;
joindDesc.m_invTimeStep = m_invTimeStep;
joindDesc.m_firstPassCoefFlag = m_firstPassCoef;
joindDesc.m_Jt = &m_Jt[index];
joindDesc.m_penetration = &m_penetration[index];
joindDesc.m_restitution = &m_restitution[index];
joindDesc.m_externAccelaration = &m_externAccel[index];
joindDesc.m_coordenateAccel = &m_coordenateAccel[index];
joindDesc.m_accelIsMotor = &m_accelIsMortor[index];
joindDesc.m_normalForceIndex = &m_normalForceIndex[index];
joindDesc.m_penetrationStiffness = &m_penetrationStiffness[index];
m_constraintArray[currJoint].m_joint->JointAccelerationsSimd(joindDesc);
currJoint++;
}
} else {
dgInt32 currJoint;
currJoint = m_jointStart;
for (dgInt32 i = 0; i < m_count; i++) {
dgInt32 index;
dgJointAccelerationDecriptor joindDesc;
index = m_constraintArray[currJoint].m_autoPairstart;
joindDesc.m_rowsCount = m_constraintArray[currJoint].m_autoPaircount;
joindDesc.m_timeStep = m_timeStep;
joindDesc.m_invTimeStep = m_invTimeStep;
joindDesc.m_firstPassCoefFlag = m_firstPassCoef;
joindDesc.m_Jt = &m_Jt[index];
joindDesc.m_penetration = &m_penetration[index];
joindDesc.m_restitution = &m_restitution[index];
joindDesc.m_externAccelaration = &m_externAccel[index];
joindDesc.m_coordenateAccel = &m_coordenateAccel[index];
joindDesc.m_accelIsMotor = &m_accelIsMortor[index];
joindDesc.m_normalForceIndex = &m_normalForceIndex[index];
joindDesc.m_penetrationStiffness = &m_penetrationStiffness[index];
m_constraintArray[currJoint].m_joint->JointAccelerations(joindDesc);
currJoint++;
}
}
m_firstPassCoef = dgFloat32(1.0f);
// m_world->m_perfomanceCounters[m_threadIndex][m_dynamicsSolveSpanningTreeTicks] += (m_world->m_getPerformanceCount() - ticks);
}
void dgParallelSolverUpdateVeloc::ThreadExecute() {
// dgUnsigned32 ticks;
// ticks = m_world->m_getPerformanceCount();
if (m_useSimd) {
#ifdef DG_BUILD_SIMD_CODE
simd_type timeStepSimd;
timeStepSimd = simd_set1(m_timeStep);
for (dgInt32 i = 0; i < m_count; i++) {
dgBody *body;
simd_type force;
simd_type torque;
simd_type accel;
simd_type alpha;
body = m_bodyArray[i].m_body;
force =
simd_add_v((simd_type &)body->m_accel, (simd_type &)m_internalForces[i].m_linear);
torque =
simd_add_v((simd_type &)body->m_alpha, (simd_type &)m_internalForces[i].m_angular);
// dgVector accel (force.Scale (body->m_invMass.m_w));
accel = simd_mul_v(force, simd_set1(body->m_invMass.m_w));
// dgVector alpha (body->m_invWorldInertiaMatrix.RotateVector (torque));
alpha =
simd_mul_add_v(simd_mul_add_v(simd_mul_v((simd_type &)body->m_invWorldInertiaMatrix[0], simd_permut_v(torque, torque, PURMUT_MASK(0, 0, 0, 0))),
(simd_type &)body->m_invWorldInertiaMatrix[1], simd_permut_v(torque, torque, PURMUT_MASK(1, 1, 1, 1))),
(simd_type &)body->m_invWorldInertiaMatrix[2], simd_permut_v(torque, torque, PURMUT_MASK(2, 2, 2, 2)));
// body->m_veloc += accel.Scale(timeStep);
(simd_type &)body->m_veloc =
simd_mul_add_v((simd_type &)body->m_veloc, accel, timeStepSimd);
// body->m_omega += alpha.Scale(timeStep);
(simd_type &)body->m_omega =
simd_mul_add_v((simd_type &)body->m_omega, alpha, timeStepSimd);
// body->m_netForce += body->m_veloc;
(simd_type &)m_internalVeloc[i].m_linear =
simd_add_v((simd_type &)m_internalVeloc[i].m_linear, (simd_type &)body->m_veloc);
// body->m_netTorque += body->m_omega;
(simd_type &)m_internalVeloc[i].m_angular =
simd_add_v((simd_type &)m_internalVeloc[i].m_angular, (simd_type &)body->m_omega);
}
#endif
} else {
for (dgInt32 i = 0; i < m_count; i++) {
dgBody *body;
body = m_bodyArray[i].m_body;
dgVector force(body->m_accel + m_internalForces[i].m_linear);
dgVector torque(body->m_alpha + m_internalForces[i].m_angular);
dgVector accel(force.Scale(body->m_invMass.m_w));
dgVector alpha(body->m_invWorldInertiaMatrix.RotateVector(torque));
body->m_veloc += accel.Scale(m_timeStep);
body->m_omega += alpha.Scale(m_timeStep);
m_internalVeloc[i].m_linear += body->m_veloc;
m_internalVeloc[i].m_angular += body->m_omega;
}
}
// m_world->m_perfomanceCounters[m_threadIndex][m_dynamicsSolveSpanningTreeTicks] += (m_world->m_getPerformanceCount() - ticks);
}
void dgParallelSolverUpdateForce::ThreadExecute() {
// dgUnsigned32 ticks;
// ticks = m_world->m_getPerformanceCount();
if (m_useSimd) {
#ifdef DG_BUILD_SIMD_CODE
simd_type invStepSimd;
simd_type invTimeStepSimd;
simd_type accelerationTolerance;
// signMask = simd_set1 (invStep);
invStepSimd = simd_set1(m_invStep);
invTimeStepSimd = simd_set1(m_invTimeStep);
accelerationTolerance = simd_set1(m_maxAccNorm2);
for (dgInt32 i = 0; i < m_count; i++) {
dgBody *body;
simd_type tmp;
simd_type accel;
simd_type alpha;
body = m_bodyArray[i].m_body;
#ifdef DG_WIGHT_FINAL_RK4_DERIVATIVES
// body->m_veloc = internalVeloc[i].m_linear.Scale(invStep);
// body->m_omega = internalVeloc[i].m_angular.Scale(invStep);
(simd_type &)body->m_veloc = simd_mul_v((simd_type &)m_internalVeloc[i].m_linear, invStepSimd);
(simd_type &)body->m_omega = simd_mul_v((simd_type &)m_internalVeloc[i].m_angular, invStepSimd);
#endif
// dgVector accel = (body->m_veloc - body->m_netForce).Scale (m_invTimeStep);
// dgVector alpha = (body->m_omega - body->m_netTorque).Scale (m_invTimeStep);
accel =
simd_mul_v(simd_sub_v((simd_type &)body->m_veloc, (simd_type &)body->m_netForce), invTimeStepSimd);
alpha =
simd_mul_v(simd_sub_v((simd_type &)body->m_omega, (simd_type &)body->m_netTorque), invTimeStepSimd);
// if ((accel % accel) < maxAccNorm2) {
// accel = zero;
// }
// body->m_accel = accel;
// body->m_netForce = accel.Scale (body->m_mass[3]);
tmp = simd_mul_v(accel, accel);
tmp = simd_add_v(tmp, simd_move_hl_v(tmp, tmp));
tmp = simd_add_s(tmp, simd_permut_v(tmp, tmp, PURMUT_MASK(0, 0, 0, 1)));
tmp = simd_cmplt_s(tmp, accelerationTolerance);
(simd_type &)body->m_accel =
simd_andnot_v(accel, simd_permut_v(tmp, tmp, PURMUT_MASK(0, 0, 0, 0)));
(simd_type &)body->m_netForce =
simd_mul_v((simd_type &)body->m_accel, simd_set1(body->m_mass[3]));
// if ((alpha % alpha) < maxAccNorm2) {
// alpha = zero;
// }
// body->m_alpha = alpha;
tmp = simd_mul_v(alpha, alpha);
tmp = simd_add_v(tmp, simd_move_hl_v(tmp, tmp));
tmp = simd_add_s(tmp, simd_permut_v(tmp, tmp, PURMUT_MASK(0, 0, 0, 1)));
tmp = simd_cmplt_s(tmp, accelerationTolerance);
(simd_type &)body->m_alpha =
simd_andnot_v(alpha, simd_permut_v(tmp, tmp, PURMUT_MASK(0, 0, 0, 0)));
// alpha = body->m_matrix.UnrotateVector(alpha);
alpha =
simd_mul_v((simd_type &)body->m_matrix[0], (simd_type &)body->m_alpha);
alpha = simd_add_v(alpha, simd_move_hl_v(alpha, alpha));
alpha =
simd_add_s(alpha, simd_permut_v(alpha, alpha, PURMUT_MASK(0, 0, 0, 1)));
tmp =
simd_mul_v((simd_type &)body->m_matrix[1], (simd_type &)body->m_alpha);
tmp = simd_add_v(tmp, simd_move_hl_v(tmp, tmp));
tmp = simd_add_s(tmp, simd_permut_v(tmp, tmp, PURMUT_MASK(0, 0, 0, 1)));
alpha = simd_pack_lo_v(alpha, tmp);
tmp =
simd_mul_v((simd_type &)body->m_matrix[2], (simd_type &)body->m_alpha);
tmp = simd_add_v(tmp, simd_move_hl_v(tmp, tmp));
tmp = simd_add_s(tmp, simd_permut_v(tmp, tmp, PURMUT_MASK(0, 0, 0, 1)));
alpha = simd_permut_v(alpha, tmp, PURMUT_MASK(3, 0, 1, 0));
// body->m_netTorque = body->m_matrix.RotateVector (alpha.CompProduct(body->m_mass));
alpha = simd_mul_v(alpha, (simd_type &)body->m_mass);
(simd_type &)body->m_netTorque =
simd_mul_add_v(simd_mul_add_v(simd_mul_v((simd_type &)body->m_matrix[0], simd_permut_v(alpha, alpha, PURMUT_MASK(0, 0, 0, 0))),
(simd_type &)body->m_matrix[1], simd_permut_v(alpha, alpha, PURMUT_MASK(1, 1, 1, 1))),
(simd_type &)body->m_matrix[2], simd_permut_v(alpha, alpha, PURMUT_MASK(2, 2, 2, 2)));
}
#endif
} else {
dgVector zero(dgFloat32(0.0f), dgFloat32(0.0f), dgFloat32(0.0f),
dgFloat32(0.0f));
for (dgInt32 i = 0; i < m_count; i++) {
dgBody *body;
body = m_bodyArray[i].m_body;
body->m_veloc = m_internalVeloc[i].m_linear.Scale(m_invStep);
body->m_omega = m_internalVeloc[i].m_angular.Scale(m_invStep);
dgVector accel = (body->m_veloc - body->m_netForce).Scale(m_invTimeStep);
dgVector alpha = (body->m_omega - body->m_netTorque).Scale(m_invTimeStep);
if ((accel % accel) < m_maxAccNorm2) {
accel = zero;
}
if ((alpha % alpha) < m_maxAccNorm2) {
alpha = zero;
}
body->m_accel = accel;
body->m_alpha = alpha;
body->m_netForce = accel.Scale(body->m_mass[3]);
alpha = body->m_matrix.UnrotateVector(alpha);
body->m_netTorque = body->m_matrix.RotateVector(
alpha.CompProduct(body->m_mass));
}
}
// m_world->m_perfomanceCounters[m_threadIndex][m_dynamicsSolveSpanningTreeTicks] += (m_world->m_getPerformanceCount() - ticks);
}
void dgParallelSolverInitFeedbackUpdate::ThreadExecute() {
// dgUnsigned32 ticks;
// ticks = m_world->m_getPerformanceCount();
for (dgInt32 i = 0; i < m_count; i++) {
dgInt32 first;
dgInt32 count;
dgInt32 index;
dgFloat32 val;
// dgFloat32 maxForce;
// maxForce = dgFloat32 (0.0f);
first = m_constraintArray[i].m_autoPairstart;
count = m_constraintArray[i].m_autoPaircount;
for (dgInt32 j = 0; j < count; j++) {
index = j + first;
val = m_force[index];
// maxForce = GetMax(dgAbsf (val), maxForce);
NEWTON_ASSERT(dgCheckFloat(val));
m_jointForceFeeback[index][0] = val;
}
m_hasJointFeeback |= (m_constraintArray[i].m_joint->m_updaFeedbackCallback ? 1 : 0);
// if (m_constraintArray[i].m_joint->m_updaFeedbackCallback) {
// m_constraintArray[i].m_joint->m_updaFeedbackCallback (*m_constraintArray[i].m_joint, m_timestep, m_threadIndex);
// }
// if (m_constraintArray[i].m_joint->GetId() == dgContactConstraintId) {
// dgInt32 m0;
// dgInt32 m1;
// m0 = m_constraintArray[i].m_m0;
// m1 = m_constraintArray[i].m_m1;
// m_world->dgGetIndirectLock(&m_locks[m0]);
// m_world->AddToBreakQueue ((dgContact*)m_constraintArray[i].m_joint, m_constraintArray[i].m_joint->m_body0, maxForce);
// m_world->dgReleaseIndirectLock(&m_locks[m0]);
// m_world->dgGetIndirectLock(&m_locks[m1]);
// m_world->AddToBreakQueue ((dgContact*)m_constraintArray[i].m_joint, m_constraintArray[i].m_joint->m_body1, maxForce);
// m_world->dgReleaseIndirectLock(&m_locks[m1]);
// }
}
// m_world->m_perfomanceCounters[m_threadIndex][m_dynamicsSolveSpanningTreeTicks] += (m_world->m_getPerformanceCount() - ticks);
}
void dgParallelSolverCalculateForces::ThreadExecute() {
// dgInt32 threadIndex;
// dgUnsigned32 ticks;
// ticks = m_world->m_getPerformanceCount();
// threadIndex = m_threadIndex;
if (m_useSimd) {
#ifdef DG_BUILD_SIMD_CODE
dgFloatSign tmpIndex;
simd_type signMask;
simd_type accNorm;
tmpIndex.m_integer.m_iVal = 0x7fffffff;
signMask = simd_set1(tmpIndex.m_fVal);
accNorm = simd_set1(dgFloat32(0.0f));
for (dgInt32 i = 0; i < m_count; i += m_threads) {
// for (dgInt32 currJoint = 0; currJoint < m_count; currJoint += m_threads) {
dgInt32 m0;
dgInt32 m1;
dgInt32 index;
// dgInt32 rowIndex;
dgInt32 currJoint;
dgInt32 rowsCount;
simd_type linearM0;
simd_type angularM0;
simd_type linearM1;
simd_type angularM1;
currJoint = m_threadIndex + i;
// rowIndex = m_jointRemapArray[i];
index = m_constraintArray[currJoint].m_autoPairstart;
rowsCount = m_constraintArray[currJoint].m_autoPaircount;
m0 = m_constraintArray[currJoint].m_m0;
m1 = m_constraintArray[currJoint].m_m1;
if (m0) {
m_world->dgGetIndirectLock(&m_locks[m0]);
}
if (m1) {
m_world->dgGetIndirectLock(&m_locks[m1]);
}
linearM0 = (simd_type &)m_internalForces[m0].m_linear;
angularM0 = (simd_type &)m_internalForces[m0].m_angular;
linearM1 = (simd_type &)m_internalForces[m1].m_linear;
angularM1 = (simd_type &)m_internalForces[m1].m_angular;
for (dgInt32 k = 0; k < rowsCount; k++) {
dgInt32 frictionIndex;
simd_type a;
simd_type f;
simd_type frictionNormal;
simd_type lowerFrictionForce;
simd_type upperFrictionForce;
// dgVector acc (m_JMinv[index].m_jacobian_IM0.m_linear.CompProduct(linearM0));
// acc += m_JMinv[index].m_jacobian_IM0.m_angular.CompProduct (angularM0);
// acc += m_JMinv[index].m_jacobian_IM1.m_linear.CompProduct (linearM1);
// acc += m_JMinv[index].m_jacobian_IM1.m_angular.CompProduct (angularM1);
a =
simd_mul_v((simd_type &)m_JMinv[index].m_jacobian_IM0.m_linear, linearM0);
a =
simd_mul_add_v(a, (simd_type &)m_JMinv[index].m_jacobian_IM0.m_angular, angularM0);
a =
simd_mul_add_v(a, (simd_type &)m_JMinv[index].m_jacobian_IM1.m_linear, linearM1);
a =
simd_mul_add_v(a, (simd_type &)m_JMinv[index].m_jacobian_IM1.m_angular, angularM1);
// a = coordenateAccel[index] - acc.m_x - acc.m_y - acc.m_z - force[index] * diagDamp[index];
a = simd_add_v(a, simd_move_hl_v(a, a));
a = simd_add_s(a, simd_permut_v(a, a, PURMUT_MASK(3, 3, 3, 1)));
a =
simd_sub_s(simd_load_s(m_coordenateAccel[index]), simd_mul_add_s(a, simd_load_s(m_force[index]), simd_load_s(m_diagDamp[index])));
// f = force[index] + invDJMinvJt[index] * a;
f =
simd_mul_add_s(simd_load_s(m_force[index]), simd_load_s(m_invDJMinvJt[index]), a);
frictionIndex = m_normalForceIndex[index];
NEWTON_ASSERT(
((frictionIndex < 0) && (m_force[frictionIndex] == dgFloat32(1.0f))) || ((frictionIndex >= 0) && (m_force[frictionIndex] >= dgFloat32(0.0f))));
// frictionNormal = force[frictionIndex];
// lowerFrictionForce = frictionNormal * lowerFrictionCoef[index];
// upperFrictionForce = frictionNormal * upperFrictionCoef[index];
frictionNormal = simd_load_s(m_force[frictionIndex]);
lowerFrictionForce =
simd_mul_s(frictionNormal, simd_load_s(m_lowerFrictionCoef[index]));
upperFrictionForce =
simd_mul_s(frictionNormal, simd_load_s(m_upperFrictionCoef[index]));
// if (f > upperFrictionForce) {
// a = dgFloat32 (0.0f);
// f = upperFrictionForce;
// } else if (f < lowerFrictionForce) {
// a = dgFloat32 (0.0f);
// f = lowerFrictionForce;
// }
// frictionNormal = simd_or_v (simd_cmplt_s (f, lowerFrictionForce), simd_cmpgt_s (f, upperFrictionForce));
f = simd_min_s(simd_max_s(f, lowerFrictionForce), upperFrictionForce);
a =
simd_andnot_v(a, simd_or_v(simd_cmplt_s(f, lowerFrictionForce), simd_cmpgt_s(f, upperFrictionForce)));
accNorm = simd_max_s(accNorm, simd_and_v(a, signMask));
// prevValue = f - force[index]);
a = simd_sub_s(f, simd_load_s(m_force[index]));
a = simd_permut_v(a, a, PURMUT_MASK(0, 0, 0, 0));
// force[index] = f;
simd_store_s(f, &m_force[index]);
linearM0 =
simd_mul_add_v(linearM0, (simd_type &)m_Jt[index].m_jacobian_IM0.m_linear, a);
angularM0 =
simd_mul_add_v(angularM0, (simd_type &)m_Jt[index].m_jacobian_IM0.m_angular, a);
linearM1 =
simd_mul_add_v(linearM1, (simd_type &)m_Jt[index].m_jacobian_IM1.m_linear, a);
angularM1 =
simd_mul_add_v(angularM1, (simd_type &)m_Jt[index].m_jacobian_IM1.m_angular, a);
index++;
}
(simd_type &)m_internalForces[m0].m_linear = linearM0;
(simd_type &)m_internalForces[m0].m_angular = angularM0;
(simd_type &)m_internalForces[m1].m_linear = linearM1;
(simd_type &)m_internalForces[m1].m_angular = angularM1;
if (m1) {
m_world->dgReleaseIndirectLock(&m_locks[m1]);
}
if (m0) {
m_world->dgReleaseIndirectLock(&m_locks[m0]);
}
}
simd_store_s(accNorm, &m_accNorm);
#endif
} else {
dgFloat32 accNorm;
accNorm = dgFloat32(0.0f);
for (dgInt32 i = 0; i < m_count; i += m_threads) {
dgInt32 m0;
dgInt32 m1;
dgInt32 index;
dgInt32 currJoint;
dgInt32 rowsCount;
currJoint = m_threadIndex + i;
index = m_constraintArray[currJoint].m_autoPairstart;
rowsCount = m_constraintArray[currJoint].m_autoPaircount;
m0 = m_constraintArray[currJoint].m_m0;
m1 = m_constraintArray[currJoint].m_m1;
if (m0) {
m_world->dgGetIndirectLock(&m_locks[m0]);
}
if (m1) {
m_world->dgGetIndirectLock(&m_locks[m1]);
}
dgVector linearM0(m_internalForces[m0].m_linear);
dgVector angularM0(m_internalForces[m0].m_angular);
dgVector linearM1(m_internalForces[m1].m_linear);
dgVector angularM1(m_internalForces[m1].m_angular);
for (dgInt32 k = 0; k < rowsCount; k++) {
dgInt32 frictionIndex;
dgFloat32 a;
dgFloat32 f;
dgFloat32 prevValue;
dgFloat32 frictionNormal;
dgFloat32 lowerFrictionForce;
dgFloat32 upperFrictionForce;
dgVector acc(
m_JMinv[index].m_jacobian_IM0.m_linear.CompProduct(linearM0));
acc += m_JMinv[index].m_jacobian_IM0.m_angular.CompProduct(angularM0);
acc += m_JMinv[index].m_jacobian_IM1.m_linear.CompProduct(linearM1);
acc += m_JMinv[index].m_jacobian_IM1.m_angular.CompProduct(angularM1);
a = m_coordenateAccel[index] - acc.m_x - acc.m_y - acc.m_z - m_force[index] * m_diagDamp[index];
f = m_force[index] + m_invDJMinvJt[index] * a;
frictionIndex = m_normalForceIndex[index];
NEWTON_ASSERT(
((frictionIndex < 0) && (m_force[frictionIndex] == dgFloat32(1.0f))) || ((frictionIndex >= 0) && (m_force[frictionIndex] >= dgFloat32(0.0f))));
frictionNormal = m_force[frictionIndex];
lowerFrictionForce = frictionNormal * m_lowerFrictionCoef[index];
upperFrictionForce = frictionNormal * m_upperFrictionCoef[index];
if (f > upperFrictionForce) {
a = dgFloat32(0.0f);
f = upperFrictionForce;
} else if (f < lowerFrictionForce) {
a = dgFloat32(0.0f);
f = lowerFrictionForce;
}
accNorm = GetMax(accNorm, dgAbsf(a));
prevValue = f - m_force[index];
m_force[index] = f;
linearM0 += m_Jt[index].m_jacobian_IM0.m_linear.Scale(prevValue);
angularM0 += m_Jt[index].m_jacobian_IM0.m_angular.Scale(prevValue);
linearM1 += m_Jt[index].m_jacobian_IM1.m_linear.Scale(prevValue);
angularM1 += m_Jt[index].m_jacobian_IM1.m_angular.Scale(prevValue);
index++;
}
m_internalForces[m0].m_linear = linearM0;
m_internalForces[m0].m_angular = angularM0;
m_internalForces[m1].m_linear = linearM1;
m_internalForces[m1].m_angular = angularM1;
if (m1) {
m_world->dgReleaseIndirectLock(&m_locks[m1]);
}
if (m0) {
m_world->dgReleaseIndirectLock(&m_locks[m0]);
}
}
m_accNorm = accNorm;
}
// m_world->m_perfomanceCounters[m_threadIndex][m_dynamicsSolveSpanningTreeTicks] += (m_world->m_getPerformanceCount() - ticks);
}
void dgJacobianMemory::CalculateForcesGameModeParallel(dgInt32 iterations,
dgFloat32 maxAccNorm, dgInt32 archModel) const {
dgFloat32 *const force = m_force;
const dgJacobianPair *const Jt = m_Jt;
const dgJacobianPair *const JMinv = m_JMinv;
const dgFloat32 *const diagDamp = m_diagDamp;
const dgFloat32 *const invDJMinvJt = m_invDJMinvJt;
const dgBodyInfo *bodyArray = m_bodyArray;
dgFloat32 *const penetration = m_penetration;
const dgFloat32 *const externAccel = m_deltaAccel;
const dgFloat32 *const restitution = m_restitution;
dgFloat32 *const coordenateAccel = m_coordenateAccel;
dgJacobian *const internalVeloc = m_internalVeloc;
dgJacobian *const internalForces = m_internalForces;
;
dgFloat32 **const jointForceFeeback = m_jointFeebackForce;
const dgInt32 *const accelIsMortor = m_accelIsMotor;
const dgInt32 *const normalForceIndex = m_normalForceIndex;
const dgJointInfo *const constraintArray = m_constraintArray;
const dgFloat32 *const penetrationStiffness = m_penetrationStiffness;
const dgFloat32 *const lowerFrictionCoef = m_lowerBoundFrictionCoefficent;
const dgFloat32 *const upperFrictionCoef = m_upperBoundFrictionCoefficent;
dgVector zero(dgFloat32(0.0f), dgFloat32(0.0f), dgFloat32(0.0f),
dgFloat32(0.0f));
dgFloat32 invStep = (dgFloat32(1.0f) / dgFloat32(LINEAR_SOLVER_SUB_STEPS));
dgFloat32 timeStep = m_timeStep * invStep;
dgFloat32 invTimeStep = m_invTimeStep * dgFloat32(LINEAR_SOLVER_SUB_STEPS);
dgInt32 threads = dgInt32(m_world->m_numberOfTheads);
dgInt32 chunkJointSizes[DG_MAXIMUN_THREADS];
dgInt32 chunkBodiesSizes[DG_MAXIMUN_THREADS];
dgInt32 acc = 0;
m_world->m_threadsManager.CalculateChunkSizes(m_bodyCount - 1,
chunkBodiesSizes);
dgParallelSolverClear *const clearAccumulators =
m_world->m_dynamicSolver.m_clearAccumulators;
for (dgInt32 threadIndex = 0; threadIndex < threads; threadIndex++) {
clearAccumulators[threadIndex].m_useSimd = archModel;
clearAccumulators[threadIndex].m_world = m_world;
clearAccumulators[threadIndex].m_threadIndex = threadIndex;
clearAccumulators[threadIndex].m_count = chunkBodiesSizes[threadIndex];
clearAccumulators[threadIndex].m_bodyArray = &bodyArray[acc + 1];
clearAccumulators[threadIndex].m_internalVeloc = &internalVeloc[acc + 1];
clearAccumulators[threadIndex].m_internalForces = &internalForces[acc + 1];
m_world->m_threadsManager.SubmitJob(&clearAccumulators[threadIndex]);
acc += chunkBodiesSizes[threadIndex];
}
m_world->m_threadsManager.SynchronizationBarrier();
internalVeloc[0].m_linear = zero;
internalVeloc[0].m_angular = zero;
internalForces[0].m_linear = zero;
internalForces[0].m_angular = zero;
acc = 0;
m_world->m_threadsManager.CalculateChunkSizes(m_jointCount, chunkJointSizes);
dgParallelSolverInitInternalForces *const parallelInitIntenalForces =
m_world->m_dynamicSolver.m_parallelInitIntenalForces;
for (dgInt32 threadIndex = 0; threadIndex < threads; threadIndex++) {
parallelInitIntenalForces[threadIndex].m_useSimd = archModel;
parallelInitIntenalForces[threadIndex].m_world = m_world;
parallelInitIntenalForces[threadIndex].m_locks = m_treadLocks;
parallelInitIntenalForces[threadIndex].m_threadIndex = threadIndex;
parallelInitIntenalForces[threadIndex].m_Jt = Jt;
parallelInitIntenalForces[threadIndex].m_force = force;
parallelInitIntenalForces[threadIndex].m_internalForces = internalForces;
parallelInitIntenalForces[threadIndex].m_count =
chunkJointSizes[threadIndex];
parallelInitIntenalForces[threadIndex].m_constraintArray =
&constraintArray[acc];
m_world->m_threadsManager.SubmitJob(
&parallelInitIntenalForces[threadIndex]);
acc += chunkJointSizes[threadIndex];
}
m_world->m_threadsManager.SynchronizationBarrier();
acc = 0;
dgParallelSolverJointAcceleration *const parallelSolverJointAcceleration =
m_world->m_dynamicSolver.m_parallelSolverJointAcceleration;
for (dgInt32 threadIndex = 0; threadIndex < threads; threadIndex++) {
parallelSolverJointAcceleration[threadIndex].m_useSimd = archModel;
parallelSolverJointAcceleration[threadIndex].m_world = m_world;
parallelSolverJointAcceleration[threadIndex].m_threadIndex = threadIndex;
parallelSolverJointAcceleration[threadIndex].m_Jt = Jt;
parallelSolverJointAcceleration[threadIndex].m_count =
chunkJointSizes[threadIndex];
parallelSolverJointAcceleration[threadIndex].m_constraintArray =
constraintArray;
parallelSolverJointAcceleration[threadIndex].m_jointStart = acc;
// parallelSolverJointAcceleration[threadIndex].m_jointRemapArray = &jointRemapArray[acc];
parallelSolverJointAcceleration[threadIndex].m_timeStep = timeStep;
parallelSolverJointAcceleration[threadIndex].m_invTimeStep = invTimeStep;
parallelSolverJointAcceleration[threadIndex].m_firstPassCoef = dgFloat32(
1.0f);
parallelSolverJointAcceleration[threadIndex].m_penetration = penetration;
parallelSolverJointAcceleration[threadIndex].m_restitution = restitution;
parallelSolverJointAcceleration[threadIndex].m_externAccel = externAccel;
parallelSolverJointAcceleration[threadIndex].m_coordenateAccel =
coordenateAccel;
parallelSolverJointAcceleration[threadIndex].m_accelIsMortor =
accelIsMortor;
parallelSolverJointAcceleration[threadIndex].m_normalForceIndex =
normalForceIndex;
parallelSolverJointAcceleration[threadIndex].m_penetrationStiffness =
penetrationStiffness;
acc += chunkJointSizes[threadIndex];
}
acc = 0;
dgParallelSolverUpdateVeloc *const parallelSolverUpdateVeloc =
m_world->m_dynamicSolver.m_parallelSolverUpdateVeloc;
for (dgInt32 threadIndex = 0; threadIndex < threads; threadIndex++) {
parallelSolverUpdateVeloc[threadIndex].m_useSimd = archModel;
parallelSolverUpdateVeloc[threadIndex].m_world = m_world;
parallelSolverUpdateVeloc[threadIndex].m_threadIndex = threadIndex;
parallelSolverUpdateVeloc[threadIndex].m_timeStep = timeStep;
parallelSolverUpdateVeloc[threadIndex].m_count =
chunkBodiesSizes[threadIndex];
parallelSolverUpdateVeloc[threadIndex].m_bodyArray = &bodyArray[acc + 1];
parallelSolverUpdateVeloc[threadIndex].m_internalVeloc = &internalVeloc[acc + 1];
parallelSolverUpdateVeloc[threadIndex].m_internalForces =
&internalForces[acc + 1];
acc += chunkBodiesSizes[threadIndex];
}
dgParallelSolverCalculateForces *const parallelSolverCalculateForces =
m_world->m_dynamicSolver.m_parallelSolverCalculateForces;
for (dgInt32 threadIndex = 0; threadIndex < threads; threadIndex++) {
parallelSolverCalculateForces[threadIndex].m_useSimd = archModel;
parallelSolverCalculateForces[threadIndex].m_world = m_world;
parallelSolverCalculateForces[threadIndex].m_threads = threads;
parallelSolverCalculateForces[threadIndex].m_threadIndex = threadIndex;
parallelSolverCalculateForces[threadIndex].m_Jt = Jt;
parallelSolverCalculateForces[threadIndex].m_JMinv = JMinv;
parallelSolverCalculateForces[threadIndex].m_count =
chunkJointSizes[threadIndex] * threads;
// parallelSolverCalculateForces[threadIndex].m_jointRemapArray = &jointRemapArray[threadIndex];
parallelSolverCalculateForces[threadIndex].m_constraintArray =
constraintArray;
parallelSolverCalculateForces[threadIndex].m_locks = m_treadLocks;
parallelSolverCalculateForces[threadIndex].m_force = force;
parallelSolverCalculateForces[threadIndex].m_diagDamp = diagDamp;
parallelSolverCalculateForces[threadIndex].m_invDJMinvJt = invDJMinvJt;
parallelSolverCalculateForces[threadIndex].m_normalForceIndex =
normalForceIndex;
parallelSolverCalculateForces[threadIndex].m_internalForces =
internalForces;
parallelSolverCalculateForces[threadIndex].m_coordenateAccel =
coordenateAccel;
parallelSolverCalculateForces[threadIndex].m_lowerFrictionCoef =
lowerFrictionCoef;
parallelSolverCalculateForces[threadIndex].m_upperFrictionCoef =
upperFrictionCoef;
}
dgInt32 maxPasses = iterations + DG_BASE_ITERATION_COUNT;
for (dgInt32 step = 0; step < LINEAR_SOLVER_SUB_STEPS; step++) {
for (dgInt32 threadIndex = 0; threadIndex < threads; threadIndex++) {
m_world->m_threadsManager.SubmitJob(
&parallelSolverJointAcceleration[threadIndex]);
}
m_world->m_threadsManager.SynchronizationBarrier();
dgFloat32 accNorm = maxAccNorm * dgFloat32(2.0f);
for (dgInt32 passes = 0; (passes < maxPasses) && (accNorm > maxAccNorm);
passes++) {
for (dgInt32 threadIndex = 0; threadIndex < threads; threadIndex++) {
m_world->m_threadsManager.SubmitJob(
&parallelSolverCalculateForces[threadIndex]);
}
m_world->m_threadsManager.SynchronizationBarrier();
accNorm = dgFloat32(0.0f);
for (dgInt32 threadIndex = 0; threadIndex < threads; threadIndex++) {
if (parallelSolverCalculateForces[threadIndex].m_accNorm > accNorm) {
accNorm = parallelSolverCalculateForces[threadIndex].m_accNorm;
}
}
// accNorm = 1.0f;
}
for (dgInt32 threadIndex = 0; threadIndex < threads; threadIndex++) {
m_world->m_threadsManager.SubmitJob(
&parallelSolverUpdateVeloc[threadIndex]);
}
m_world->m_threadsManager.SynchronizationBarrier();
}
acc = 0;
m_world->m_threadsManager.CalculateChunkSizes(m_jointCount, chunkJointSizes);
dgParallelSolverInitFeedbackUpdate *const parallelSolverInitFeedbackUpdate =
m_world->m_dynamicSolver.m_parallelSolverInitFeedbackUpdate;
for (dgInt32 threadIndex = 0; threadIndex < threads; threadIndex++) {
parallelSolverInitFeedbackUpdate[threadIndex].m_locks = m_treadLocks;
parallelSolverInitFeedbackUpdate[threadIndex].m_world = m_world;
parallelSolverInitFeedbackUpdate[threadIndex].m_hasJointFeeback = 0;
parallelSolverInitFeedbackUpdate[threadIndex].m_threadIndex = threadIndex;
parallelSolverInitFeedbackUpdate[threadIndex].m_force = force;
parallelSolverInitFeedbackUpdate[threadIndex].m_timestep = timeStep;
parallelSolverInitFeedbackUpdate[threadIndex].m_jointForceFeeback =
jointForceFeeback;
parallelSolverInitFeedbackUpdate[threadIndex].m_count =
chunkJointSizes[threadIndex];
parallelSolverInitFeedbackUpdate[threadIndex].m_constraintArray =
&constraintArray[acc];
m_world->m_threadsManager.SubmitJob(
&parallelSolverInitFeedbackUpdate[threadIndex]);
acc += chunkJointSizes[threadIndex];
}
m_world->m_threadsManager.SynchronizationBarrier();
acc = 0;
dgFloat32 maxAccNorm2 = maxAccNorm * maxAccNorm;
dgParallelSolverUpdateForce *const parallelSolverUpdateForce =
m_world->m_dynamicSolver.m_parallelSolverUpdateForce;
for (dgInt32 threadIndex = 0; threadIndex < threads; threadIndex++) {
parallelSolverUpdateForce[threadIndex].m_useSimd = archModel;
parallelSolverUpdateForce[threadIndex].m_world = m_world;
parallelSolverUpdateForce[threadIndex].m_threadIndex = threadIndex;
parallelSolverUpdateForce[threadIndex].m_invStep = invStep;
parallelSolverUpdateForce[threadIndex].m_invTimeStep = m_invTimeStep;
parallelSolverUpdateForce[threadIndex].m_maxAccNorm2 = maxAccNorm2;
parallelSolverUpdateForce[threadIndex].m_count =
chunkBodiesSizes[threadIndex];
parallelSolverUpdateForce[threadIndex].m_bodyArray = &bodyArray[acc + 1];
parallelSolverUpdateForce[threadIndex].m_internalVeloc = &internalVeloc[acc + 1];
acc += chunkBodiesSizes[threadIndex];
m_world->m_threadsManager.SubmitJob(
&parallelSolverUpdateForce[threadIndex]);
}
m_world->m_threadsManager.SynchronizationBarrier();
dgInt32 hasJointFeeback = 0;
for (dgInt32 threadIndex = 0; threadIndex < threads; threadIndex++) {
hasJointFeeback |=
parallelSolverInitFeedbackUpdate[threadIndex].m_hasJointFeeback;
}
if (hasJointFeeback) {
for (dgInt32 i = 0; i < m_jointCount; i++) {
if (constraintArray[i].m_joint->m_updaFeedbackCallback) {
constraintArray[i].m_joint->m_updaFeedbackCallback(
reinterpret_cast<const NewtonJoint *>(constraintArray[i].m_joint), m_timeStep, m_threadIndex);
}
}
}
}
void dgJacobianMemory::CalculateReactionsForcesParallel(dgInt32 solverMode,
dgFloat32 tolerance, dgInt32 archModel) const {
// NEWTON_ASSERT (m_jointCount >= DG_PARALLEL_JOINT_COUNT);
if (solverMode) {
CalculateForcesGameModeParallel(solverMode, tolerance, archModel);
} else if (archModel) {
CalculateForcesSimulationModeSimd(tolerance);
} else {
CalculateForcesSimulationMode(tolerance);
}
}