public Action decide(State state) { // Choose randomly between equally good options floatvalue= maximize ? Float.NEGATIVE_INFINITY : Float.POSITIVE_INFINITY; List<Action> bestActions = newArrayList<Action>(); // Iterate! intflag= maximize ? 1 : -1; for (Action action : state.getActions()) { try { // Algorithm! StatenewState= action.applyTo(state); floatnewValue=this.miniMaxRecursor(newState, 1, !this.maximize); // Better candidates? if (flag * newValue > flag * value) { value = newValue; bestActions.clear(); } // Add it to the list of candidates? if (flag * newValue >= flag * value) bestActions.add(action); } catch (InvalidActionException e) { thrownewRuntimeException("Invalid action!"); } } // If there are more than one best actions, pick one of the best randomly Collections.shuffle(bestActions); return bestActions.get(0); }
publicfloatminiMaxRecursor(State state, int depth, boolean maximize) { // Has this state already been computed? if (computedStates.containsKey(state)) // Return the stored result return computedStates.get(state); // Is this state done? if (state.getStatus() != Status.Ongoing) // Store and return return finalize(state, state.heuristic()); // Have we reached the end of the line? if (depth == this.depth) //Return the heuristic value return state.heuristic();
首先是如果当前状态我们已经算过了,那么直接返回上一次计算存在HashMap的值即可。
否则,如果状态结束或者深度到了我们指定的超参数Maxdep,也就停止搜索返回heuristic值。
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
floatvalue= maximize ? Float.NEGATIVE_INFINITY : Float.POSITIVE_INFINITY; intflag= maximize ? 1 : -1; List<Action> test = state.getActions(); for (Action action : test) { // Check it. Is it better? If so, keep it. try { StatechildState= action.applyTo(state); floatnewValue=this.miniMaxRecursor(childState, depth + 1, !maximize); //Record the best value if (flag * newValue > flag * value) value = newValue; } catch (InvalidActionException e) { //Should not go here thrownewRuntimeException("Invalid action!"); } } // Store so we don't have to compute it again. return finalize(state, value); }
publicfloatminiMaxRecursor(State state, int depth, boolean maximize,float alpha,float beta) { // Has this state already been computed? if (computedStates.containsKey(state)) // Return the stored result return computedStates.get(state); // Is this state done? if (state.getStatus() != Status.Ongoing) // Store and return return finalize(state, state.heuristic()); // Have we reached the end of the line? if (depth == this.depth) //Return the heuristic value return state.heuristic(); // If not, recurse further. Identify the best actions to take. floatvalue= maximize ? Float.NEGATIVE_INFINITY : Float.POSITIVE_INFINITY; intflag= maximize ? 1 : -1; List<Action> test = state.getActions(); for (Action action : test) { // Check it. Is it better? If so, keep it. try { StatechildState= action.applyTo(state); floatnewValue=this.miniMaxRecursor(childState, depth + 1, !maximize,alpha,beta); //Record the best value if (flag * newValue > flag * value){ value = newValue; //here!!!!!!!!!!!!!!!!!!! if(maximize){ if(value>alpha){ if(value>beta) return finalize(state, value); alpha = value; } } else{ if(value<beta){ if(value<alpha) return finalize(state, value); beta = value; } } }
} catch (InvalidActionException e) { //Should not go here thrownewRuntimeException("Invalid action!"); } } // Store so we don't have to compute it again. return finalize(state, value); }
int d; //对于限定的深度下每一个深度都要跑一遍 for (d = 1; d < maxdepth; d++) { intalpha= LOSE; intbeta= WIN; intactionsExplored=0; for (ActionValuePair a : actions) { //枚举所有的actions,注意这里的actions有一个pair对,除了action动作外还有这个action下的最优value State n; try { n = a.action.applyTo(root); int value; if (USE_MTDF) //这里进入关键的MTDF评估函数,传入的参数是当前的状态,当前的最优值,当前的深度 value = MTDF(n, (int) a.value, d); else { intflag= maximizer ? 1 : -1; value = -AlphaBetaWithMemory(n, -beta , -alpha, d - 1, -flag); } actionsExplored++; // Store the computed value for move ordering a.value = value; } catch (InvalidActionException e) { e.printStackTrace(); } catch (OutOfTimeException e) { System.out.println("Out of time"); // revert to the previously computed values. //HOWEVER, if our best value is found to be catastrophic, keep its value. // TODO: this should keep all found catastrophic values, not just the first!
//在至少有两个action探索过后,我们就可以进行比较了!
booleanresetBest=true; if (actionsExplored > 1) { ActionValuePairbestAction= actions.get(0); // check to see if the best action is worse than another action for (int i=0; i < actionsExplored; i++) { if (bestAction.value < actions.get(i).value) { // don't reset the first choice resetBest = false; break; } } } //如果我们发现了更好的action,那么就把之前的action的value恢复到之前的状态,否则除了第一个action,其他的都恢复到之前的状态 if (resetBest) { for (ActionValuePair ac: actions) { ac.value = ac.previousValue; } } else { for (int i=1; i < actionsExplored; i++) { actions.get(i).value = actions.get(i).previousValue; } } break; } } // 提前排好序,为下一轮的迭代做好准备 Collections.sort(actions, Collections.reverseOrder()); // 更新previousvalue for (ActionValuePair a: actions) { a.previousValue = a.value; } }