performance tuning – How to resolve these errors that pop up when using ‘Compile’ for a large function?

The problem

I have been wanting to make my code which solves an optimisation problem faster. This led to this question on here and I realised from there that the root of the problem is the very slow FindMinimize. Then, using Compile and Parallelization->True together seemed like the best and one of the last options remaining.

Now at this point, I must say that I am a total noob in this area. I don’t know how (maybe because I had already executed the old code without Compile before and variables were stored in memory), but I managed to get my compiled function to run in parallel (with some help of Evaluate to avoid errors which came up otherwise) and execute without any errors and the runtime was almost 100,000 times faster. But the joy was shortlasting because restarting the kernel and running the code again threw many errors and I suspect that using Evaluate might have been the problem. If this is all getting too abstract, below is my exact code (without Evaluate since I can’t make it help right now anyway) and I have mentioned errors I got as well.

So, how do I get Compile to work here?

My code

The first part of the code is simply data given so that you can run the code. You may as well ignore the entire Part 1 of code since it’s just constants and variables to make the code complete. If you hopefully manage to run a modified code without errors, you may use ic={0,0} as an input to the function.

(*PART I*)

inputs1 = Table(Unique("u"), {2});
ga={{0.0071, -0.0564, 0.00580, 0.0104, 0.00240, -0.0321}};
de={{0.0026}};
Sxp={{1,0},{0,1},{0.,1.},{-5684.89,-7.53982},{-5684.89,-7.53982},{42863.1,-5628.04},{42863.1,-5628.04},{-55.8489,7.46575},{-55.8489,7.46575},{-7.46575,0.980098},{-7.46575,0.980098},{0.0197259,-0.00261316},{0.0197259,-0.00261316},{0.0012871,-0.000168938}};
Sxu={{0,0},{0,0},{0,0},{100,0},{100.,0},{-753.982,100},{-753.982,100.},{0.98241,-0.132629},{0.98241,-0.132629},{0.131326,-0.0174146},{0.131326,-0.0174146},{-0.000346987,0.0000464269},{-0.000346987,0.0000464269},{-0.0000226407,3.00173*10^-6}};
Sxz={{1,0,0,0,0,0,0,0,0,0,0,0,0,0},{0,0,1,0,0,0,0,0,0,0,0,0,0,0},{0,0,0,0,1.,0.,0,0,0,0,0,0,0,0},{0,0,0,0,0,0,1.,0.,0,0,0,0,0,0},{0,0,0,0,0,0,0,0,1.,0.,0,0,0,0},{0,0,0,0,0,0,0,0,0,0,1.,0.,0,0},{0,0,0,0,0,0,0,0,0,0,0,0,1.,0.}};
Suz={{0,0},{0,0},{0,0},{0,0},{0,0},{0,0},{0,0}}
zetasumatrix={{0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0},{0,0,0,0},{0.125,0,0,0},{0.,0,0,0},{0.,0,0,0},{0.,0,0,0},{0.,0,0,0},{0.,0,0,0},{0.042875,0.125,0,0},{0.25,0.,0,0},{0.,0.,0,0},{0.,0.,0,0},{0.,0.,0,0},{0.,0.,0,0},{-0.290294,0.042875,0.125,0},{0.08575,0.25,0.,0},{0.25,0.,0.,0},{0.,0.,0.,0},{0.,0.,0.,0},{0.,0.,0.,0},{-0.134686,-0.290294,0.042875,0.125},{-0.580588,0.08575,0.25,0.},{0.08575,0.25,0.,0.},{0.25,0.,0.,0.},{0.,0.,0.,0.},{0.,0.,0.,0.}};
zminus2M = ConstantArray(0, {4, 1});
M=2;
hor=2;
n=2;
m=1;
o=1;
x=6;
xs={0.6,0};
us=0.6;
xshat={0.6,0};
theta=xshat((1))/0.577; 
P={{6.78614, -0.962986, 4.91036, -0.839262, 
  1.84927, -0.218886}, {-0.962986, 1.8242, -1.07876, 
  2.54396, -0.536404, 1.83934}, {4.91036, -1.07876, 3.70497, -1.16611,
   1.4347, -0.571865}, {-0.839262, 2.54396, -1.16611, 
  3.64699, -0.633915, 2.69184}, {1.84927, -0.536404, 
  1.4347, -0.633915, 0.56924, -0.362581}, {-0.218886, 
  1.83934, -0.571865, 2.69184, -0.362581, 2.02307}};
windowfunction = Array(HammingWindow, 2*M + 1, {-1/2, 1/2});
X={{3.03185*10^-6, 1.29453*10^-6, 2.80629*10^-7}, {1.29453*10^-6, 
  7.31323*10^-7, 1.01069*10^-7}, {2.80629*10^-7, 1.01069*10^-7, 
  46.2439}};
Q = {{0, 0}, {0, 0}};
R = {{1}};
Qf={{3231.8, 4.28555}, {4.28555, 3231.8}};
alpha=0.2;
Ta=IdentityMatrix(2);
statesxu = Sxu.inputs1;
outputsuz = Suz.inputs1;

(*PART II*)

optimalinputs1 = 
  Compile({{ic, _Real}},
   
    Block({zetas1 = {}, zetas, zetap1, zetap, states1, states, 
      outputs, outputs0, obj, speccons, setcons, doms, constraints, 
      shiftedstates, shiftedinputs},
     
     
     states1 = Sxp.ic + statesxu;
     states = 
      Table(states1((i ;; i + n - 1)), {i, 1, (hor + 2*M)*n + 1, n});
     
     
     outputs0 = Sxz.states1 + outputsuz;
     outputs3 = 
      Table(outputs0((i ;; i + o - 1)), {i, 1, (hor + 2*M)*o+1,o});
     outputs = 
      ArrayFlatten({{zminus2M}, {outputs3}});
     
     zetas1 = 
      Flatten(Table(  
        zetasumatrix.ArrayFlatten(
          windowfunction((1 ;; 2*M))*outputs((p + M + 1 ;; p+3*M)), 
          1), {p, -M, hor + M}));
     zetas = 
      Table(zetas1((i ;; i + x - 1)), {i, 
        1, (2*M + 1)*(hor + 2*M + 1)*x - x + 1, x});
     
     
     shiftedstates = Table(states((i)) - xs, {i, hor});
     shiftedinputs = inputs1 - us; 
     
    obj = Sum(shiftedstates((i)).Q.shiftedstates((i)), {i, hor}) + 
   Sum((shiftedinputs((i))*R*shiftedinputs((i)))((1))((1)), {i, 
     hor}) + (states((hor + 1)) - xs).Qf.(states((hor + 1)) - 
      xs) + (xs - xshat).Ta.(xs - xshat);
     
     speccons = 
      Block({itr, outputsp, zetaoutputsp, zetaoutputsp1, cons, 
        speccon = True},
       Do(
         itr = p + M + 1;
        
        zetap = zetas(((itr - 1)*2*M + itr ;; itr*2*M + itr));
        outputsp = outputs((p + M + 1 ;; p + 3*M + 1));
        
        zetaoutputsp = Transpose({zetap, outputsp});
        zetaoutputsp1 = 
         Table(ArrayFlatten(zetaoutputsp((i)), 1), {i, 2*M + 1});
        
        cons = Sum(
        zetaoutputsp1((k)).(Transpose(
            ArrayFlatten({{ga, 
               windowfunction((k))*de}})).ArrayFlatten({{ga, 
              windowfunction((k))*de}})).zetaoutputsp1((k)), {k, 
         2*M + 1}) + Last(zetap).P.Last(zetap) <= alpha;
   speccon = speccon && cons, {p, -M, hor + M}); speccon);
     
     doms = Block({region = True},
       Do(
        region = region && inputs1((j)) (Element) Reals, {j, 
         hor}); 
       region);
     
     setcons = Block({con = True},
       Do(
        con = con && {-15, -100} (VectorLessEqual) 
           states((i)) (VectorLessEqual) {15, 100}, {i, hor}); 
       Do(con = con && -10 <= inputs1((j)) <= 10, {j, hor}); 
       con); 
     
     constraints = 
      setcons && doms && 
       speccons && (Join(states((hor + 1)) - xs, {theta})).(Inverse@X).(Join(states((hor + 1)) - xs, {theta})) <= 1;
     
     optvec = 
      inputs1 /. 
       Last(FindMinimum({obj, constraints}, inputs1, 
         MaxIterations -> 10000, Method -> "InteriorPoint"));
     optvec), RuntimeAttributes -> {Listable}, 
   Parallelization -> True);

The errors on running above code

Compile::part: Part specification outputs((p+2+1;;p+3 2)) cannot be compiled since the argument is not a tensor of sufficient rank. Evaluation will use the uncompiled function.
Compile::part: Part specification outputs((p+2+1;;p+3 2)) cannot be compiled since the argument is not a tensor of sufficient rank. Evaluation will use the uncompiled function.
Compile::part: Part specification outputs((p+M+1;;p+3 M+1)) cannot be compiled since the argument is not a tensor of sufficient rank. Evaluation will use the uncompiled function.
General::stop: Further output of Compile::part will be suppressed during this calculation.
Compile::cset: Variable region of type True|False encountered in assignment of type _Real.
Compile::cif: The types of the two results in If(con,{-15,-100}(VectorLessEqual)states((i))(VectorLessEqual){15,100},False) are incompatible because their ranks are different. Evaluation will use the uncompiled function.
Compile::cset: Variable con of type True|False encountered in assignment of type _Real.

I am grateful for your time and efforts to go through this longish piece of code.