1use anyhow::{anyhow, Result};
2
3use crate::config::RewardConfig;
4use crate::events::Event;
5use crate::legal::{ActionDesc, DecisionKind};
6use crate::state::{Phase, StageStatus, TerminalResult, TimingWindow};
7
8use super::super::{GameEnv, MAX_CHOICE_OPTIONS};
9use crate::env::core::ProgressSignature;
10
11impl GameEnv {
12 pub fn apply_action_id(&mut self, action_id: usize) -> Result<super::super::StepOutcome> {
16 self.apply_action_id_internal(action_id, true)
17 }
18
19 pub fn apply_action_id_no_copy(
23 &mut self,
24 action_id: usize,
25 ) -> Result<super::super::StepOutcome> {
26 self.apply_action_id_internal(action_id, false)
27 }
28
29 fn apply_action_id_internal(
30 &mut self,
31 action_id: usize,
32 copy_obs: bool,
33 ) -> Result<super::super::StepOutcome> {
34 if self.is_fault_latched() {
35 return Ok(self.build_fault_step_outcome(copy_obs));
36 }
37 self.last_illegal_action = false;
38 self.last_engine_error = false;
39 self.last_engine_error_code = super::super::EngineErrorCode::None;
40 let Some(decision) = self.decision.as_ref() else {
41 return Err(anyhow!("No pending decision"));
42 };
43 self.last_perspective = decision.player;
44 let action = match self.action_for_id(action_id) {
45 Some(action) => action,
46 None => {
47 return self.handle_illegal_action(decision.player, "Invalid action id", copy_obs);
48 }
49 };
50 self.apply_action_internal(action, copy_obs)
51 }
52
53 pub fn apply_action(&mut self, action: ActionDesc) -> Result<super::super::StepOutcome> {
55 self.apply_action_internal(action, true)
56 }
57
58 fn apply_action_internal(
59 &mut self,
60 action: ActionDesc,
61 copy_obs: bool,
62 ) -> Result<super::super::StepOutcome> {
63 let acting_player = self
64 .decision
65 .as_ref()
66 .map(|d| d.player)
67 .unwrap_or(self.last_perspective);
68 self.last_perspective = acting_player;
69 self.pending_damage_delta = [0, 0];
70 let decision_kind = self
71 .decision
72 .as_ref()
73 .map(|d| d.kind)
74 .ok_or_else(|| anyhow!("No decision to apply"))?;
75 self.last_action_decision_kind = Some(decision_kind);
76 let action_clone = action.clone();
77 if self.should_validate_state() {
78 if let Some(decision) = &self.decision {
79 let legal = super::legal_actions_cached(
80 &self.state,
81 decision,
82 &self.db,
83 &self.curriculum,
84 self.curriculum.allowed_card_sets_cache.as_ref(),
85 );
86 if !legal.contains(&action_clone) {
87 return self.handle_illegal_action(
88 decision.player,
89 "Action not in legal set",
90 copy_obs,
91 );
92 }
93 }
94 }
95 let outcome = match self.apply_action_impl(action, copy_obs) {
96 Ok(outcome) => Ok(outcome),
97 Err(err) => match self.config.error_policy {
98 crate::config::ErrorPolicy::Strict => Err(err),
99 crate::config::ErrorPolicy::LenientTerminate => {
100 self.last_engine_error = true;
101 self.last_engine_error_code = super::super::EngineErrorCode::ActionError;
102 self.last_perspective = acting_player;
103 self.state.terminal = Some(TerminalResult::Win {
104 winner: 1 - acting_player,
105 });
106 self.decision = None;
107 self.update_action_cache();
108 Ok(self
109 .build_outcome_with_obs(self.terminal_reward_for(acting_player), copy_obs))
110 }
111 crate::config::ErrorPolicy::LenientNoop => {
112 self.last_engine_error = true;
113 self.last_engine_error_code = super::super::EngineErrorCode::ActionError;
114 self.last_perspective = acting_player;
115 self.update_action_cache();
116 Ok(self.build_outcome_with_obs(0.0, copy_obs))
117 }
118 },
119 }?;
120 if self.recording || self.should_validate_state() {
121 let main_move_action = matches!(decision_kind, DecisionKind::Main)
122 && matches!(action_clone, ActionDesc::MainMove { .. });
123 let main_pass_action = matches!(decision_kind, DecisionKind::Main)
124 && matches!(action_clone, ActionDesc::Pass);
125 self.log_action(acting_player, action_clone);
126 self.replay_steps.push(crate::replay::StepMeta {
127 actor: acting_player,
128 decision_kind,
129 illegal_action: self.last_illegal_action,
130 engine_error: self.last_engine_error,
131 main_move_action,
132 main_pass_action,
133 });
134 }
135 Ok(outcome)
136 }
137
138 fn apply_action_impl(
139 &mut self,
140 action: ActionDesc,
141 copy_obs: bool,
142 ) -> Result<super::super::StepOutcome> {
143 let decision = self
144 .decision
145 .clone()
146 .ok_or_else(|| anyhow!("No decision to apply"))?;
147 self.last_perspective = decision.player;
148 self.last_action_desc = Some(action.clone());
149 self.last_action_player = Some(decision.player);
150 let progress_before = self.progress_signature();
151
152 let mut reward = 0.0f32;
153
154 if action == ActionDesc::Concede {
155 self.log_event(Event::Concede {
156 player: decision.player,
157 });
158 self.state.terminal = Some(TerminalResult::Win {
159 winner: 1 - decision.player,
160 });
161 self.decision = None;
162 self.state.turn.decision_count += 1;
163 self.update_action_cache();
164 if self.maybe_validate_state("post_concede") || self.is_fault_latched() {
165 return Ok(self.build_fault_step_outcome(copy_obs));
166 }
167 reward += self.compute_reward(
168 decision.player,
169 &self.pending_damage_delta,
170 &progress_before,
171 );
172 return Ok(self.build_outcome_with_obs(reward, copy_obs));
173 }
174
175 match decision.kind {
176 DecisionKind::Mulligan => match action {
177 ActionDesc::MulliganSelect { hand_index } => {
178 let p = decision.player as usize;
179 let hi = hand_index as usize;
180 if hi >= self.state.players[p].hand.len() {
181 return self.handle_illegal_action(
182 decision.player,
183 "Mulligan hand index out of range",
184 copy_obs,
185 );
186 }
187 if hi >= crate::encode::MAX_HAND {
188 return self.handle_illegal_action(
189 decision.player,
190 "Mulligan hand index exceeds encoding",
191 copy_obs,
192 );
193 }
194 let bit = 1u64 << hi;
195 let current = &mut self.state.turn.mulligan_selected[p];
196 if *current & bit != 0 {
197 *current &= !bit;
198 } else {
199 *current |= bit;
200 }
201 }
202 ActionDesc::MulliganConfirm => {
203 let p = decision.player as usize;
204 let hand_len = self.state.players[p].hand.len();
205 let mut indices: Vec<usize> = Vec::new();
206 let mask = self.state.turn.mulligan_selected[p];
207 for idx in 0..hand_len.min(crate::encode::MAX_HAND) {
208 if mask & (1u64 << idx) != 0 {
209 indices.push(idx);
210 }
211 }
212 indices.sort_by(|a, b| b.cmp(a));
213 for idx in indices.iter().copied() {
214 if idx >= self.state.players[p].hand.len() {
215 continue;
216 }
217 let card = self.state.players[p].hand.remove(idx);
218 let from_slot = if idx <= u8::MAX as usize {
219 Some(idx as u8)
220 } else {
221 None
222 };
223 self.move_card_between_zones(
224 p as u8,
225 card,
226 crate::events::Zone::Hand,
227 crate::events::Zone::WaitingRoom,
228 from_slot,
229 None,
230 );
231 }
232 let draw_count = indices.len();
233 if draw_count > 0 {
234 self.draw_to_hand(p as u8, draw_count);
235 }
236 self.state.turn.mulligan_done[p] = true;
237 self.state.turn.mulligan_selected[p] = 0;
238 }
239 _ => {
240 return self.handle_illegal_action(
241 decision.player,
242 "Invalid mulligan action",
243 copy_obs,
244 )
245 }
246 },
247 DecisionKind::Clock => {
248 match action {
249 ActionDesc::Pass => {
250 self.log_event(Event::Clock {
251 player: decision.player,
252 card: None,
253 });
254 }
255 ActionDesc::Clock { hand_index } => {
256 let p = decision.player as usize;
257 let hi = hand_index as usize;
258 if hi >= self.state.players[p].hand.len() {
259 return self.handle_illegal_action(
260 decision.player,
261 "Clock hand index out of range",
262 copy_obs,
263 );
264 }
265 let card = self.state.players[p].hand.remove(hi);
266 let card_id = card.id;
267 self.move_card_between_zones(
268 decision.player,
269 card,
270 crate::events::Zone::Hand,
271 crate::events::Zone::Clock,
272 Some(hand_index),
273 None,
274 );
275 self.log_event(Event::Clock {
276 player: decision.player,
277 card: Some(card_id),
278 });
279 self.draw_to_hand(decision.player, 2);
280 self.check_level_up(decision.player);
281 }
282 _ => {
283 return self.handle_illegal_action(
284 decision.player,
285 "Invalid clock action",
286 copy_obs,
287 )
288 }
289 }
290 self.state.turn.phase_step = 2;
291 }
292 DecisionKind::Main => match action {
293 ActionDesc::Pass => {
294 if self.curriculum.enable_priority_windows {
295 self.state.turn.main_passed = true;
296 if self.state.turn.priority.is_none() {
297 self.enter_timing_window(TimingWindow::MainWindow, decision.player);
298 }
299 } else {
300 self.state.turn.main_passed = false;
301 self.state.turn.phase = Phase::Climax;
302 self.state.turn.phase_step = 0;
303 }
304 }
305 ActionDesc::MainPlayCharacter {
306 hand_index,
307 stage_slot,
308 } => {
309 if let Err(err) = self.play_character(decision.player, hand_index, stage_slot) {
310 return self.handle_illegal_action(
311 decision.player,
312 &err.to_string(),
313 copy_obs,
314 );
315 }
316 }
317 ActionDesc::MainPlayEvent { hand_index } => {
318 if let Err(err) = self.play_event(decision.player, hand_index) {
319 return self.handle_illegal_action(
320 decision.player,
321 &err.to_string(),
322 copy_obs,
323 );
324 }
325 }
326 ActionDesc::MainMove { from_slot, to_slot } => {
327 let p = decision.player as usize;
328 let fs = from_slot as usize;
329 let ts = to_slot as usize;
330 if fs >= self.state.players[p].stage.len()
331 || ts >= self.state.players[p].stage.len()
332 || fs == ts
333 {
334 return self.handle_illegal_action(
335 decision.player,
336 "Invalid move slots",
337 copy_obs,
338 );
339 }
340 if self.state.players[p].stage[fs].card.is_none() {
341 return self.handle_illegal_action(
342 decision.player,
343 "Move requires a source slot with a card",
344 copy_obs,
345 );
346 }
347 if self.slot_has_active_modifier_kind(
348 decision.player,
349 from_slot,
350 crate::state::ModifierKind::CannotMoveStagePosition,
351 ) {
352 return self.handle_illegal_action(
353 decision.player,
354 "Source slot card cannot move",
355 copy_obs,
356 );
357 }
358 if self.state.players[p].stage[ts].card.is_some()
359 && self.slot_has_active_modifier_kind(
360 decision.player,
361 to_slot,
362 crate::state::ModifierKind::CannotMoveStagePosition,
363 )
364 {
365 return self.handle_illegal_action(
366 decision.player,
367 "Destination slot card cannot move",
368 copy_obs,
369 );
370 }
371 if self.state.turn.main_move_used {
372 return self.handle_illegal_action(
373 decision.player,
374 "Main move already used this turn",
375 copy_obs,
376 );
377 }
378 self.state.players[p].stage.swap(fs, ts);
379 self.state.turn.main_move_used = true;
380 self.remove_modifiers_for_slot(decision.player, from_slot);
381 self.remove_modifiers_for_slot(decision.player, to_slot);
382 self.mark_slot_power_dirty(decision.player, from_slot);
383 self.mark_slot_power_dirty(decision.player, to_slot);
384 self.mark_rule_actions_dirty();
385 self.mark_continuous_modifiers_dirty();
386 }
387 ActionDesc::MainActivateAbility {
388 slot,
389 ability_index,
390 } => {
391 let _ = (slot, ability_index);
392 return self.handle_illegal_action(
393 decision.player,
394 "Activated abilities only via priority window",
395 copy_obs,
396 );
397 }
398 _ => {
399 return self.handle_illegal_action(
400 decision.player,
401 "Invalid main action",
402 copy_obs,
403 )
404 }
405 },
406 DecisionKind::Climax => match action {
407 ActionDesc::Pass => {
408 self.state.turn.phase_step = 2;
409 if self.curriculum.enable_priority_windows {
410 self.enter_timing_window(TimingWindow::ClimaxWindow, decision.player);
411 }
412 }
413 ActionDesc::ClimaxPlay { hand_index } => {
414 if let Err(err) = self.play_climax(decision.player, hand_index) {
415 return self.handle_illegal_action(
416 decision.player,
417 &err.to_string(),
418 copy_obs,
419 );
420 }
421 self.state.turn.phase_step = 2;
422 if self.curriculum.enable_priority_windows {
423 self.enter_timing_window(TimingWindow::ClimaxWindow, decision.player);
424 }
425 }
426 _ => {
427 return self.handle_illegal_action(
428 decision.player,
429 "Invalid climax action",
430 copy_obs,
431 )
432 }
433 },
434 DecisionKind::AttackDeclaration => match action {
435 ActionDesc::Pass => {
436 if self.curriculum.enable_encore {
437 self.queue_encore_requests();
438 } else {
439 self.cleanup_reversed_to_waiting_room();
440 }
441 self.state.turn.phase = Phase::End;
442 self.state.turn.phase_step = 0;
443 self.state.turn.attack_phase_begin_done = false;
444 self.state.turn.attack_decl_check_done = false;
445 }
446 ActionDesc::Attack { slot, attack_type } => {
447 if let Err(err) = self.declare_attack(decision.player, slot, attack_type) {
448 return self.handle_illegal_action(
449 decision.player,
450 &err.to_string(),
451 copy_obs,
452 );
453 }
454 }
455 _ => {
456 return self.handle_illegal_action(
457 decision.player,
458 "Invalid attack action",
459 copy_obs,
460 )
461 }
462 },
463 DecisionKind::LevelUp => match action {
464 ActionDesc::LevelUp { index } => {
465 if self.state.turn.pending_level_up != Some(decision.player) {
466 return self.handle_illegal_action(
467 decision.player,
468 "No pending level up",
469 copy_obs,
470 );
471 }
472 if let Err(err) = self.resolve_level_up(decision.player, index) {
473 return self.handle_illegal_action(
474 decision.player,
475 &err.to_string(),
476 copy_obs,
477 );
478 }
479 }
480 _ => {
481 return self.handle_illegal_action(
482 decision.player,
483 "Invalid level up action",
484 copy_obs,
485 )
486 }
487 },
488 DecisionKind::Encore => match action {
489 ActionDesc::EncorePay { slot } => {
490 if let Err(err) = self.resolve_encore(decision.player, slot, true) {
491 return self.handle_illegal_action(
492 decision.player,
493 &err.to_string(),
494 copy_obs,
495 );
496 }
497 }
498 ActionDesc::EncoreDecline { slot } => {
499 if let Err(err) = self.resolve_encore(decision.player, slot, false) {
500 return self.handle_illegal_action(
501 decision.player,
502 &err.to_string(),
503 copy_obs,
504 );
505 }
506 }
507 _ => {
508 return self.handle_illegal_action(
509 decision.player,
510 "Invalid encore action",
511 copy_obs,
512 )
513 }
514 },
515 DecisionKind::TriggerOrder => {
516 let Some(order) = self.state.turn.trigger_order.clone() else {
517 return self.handle_illegal_action(
518 decision.player,
519 "No trigger order pending",
520 copy_obs,
521 );
522 };
523 if order.player != decision.player {
524 return self.handle_illegal_action(
525 decision.player,
526 "Trigger order player mismatch",
527 copy_obs,
528 );
529 }
530 match action {
531 ActionDesc::TriggerOrder { index } => {
532 let idx = index as usize;
533 if idx >= order.choices.len() {
534 return self.handle_illegal_action(
535 decision.player,
536 "Trigger order index out of range",
537 copy_obs,
538 );
539 }
540 let trigger_id = order.choices[idx];
541 let trigger_index = self
542 .state
543 .turn
544 .pending_triggers
545 .iter()
546 .position(|t| t.id == trigger_id);
547 let Some(trigger_index) = trigger_index else {
548 return self.handle_illegal_action(
549 decision.player,
550 "Trigger already resolved",
551 copy_obs,
552 );
553 };
554 let trigger = self.state.turn.pending_triggers.remove(trigger_index);
555 if let Err(err) = self.resolve_trigger(trigger) {
556 let msg = format!("Trigger resolve failed: {err}");
557 return self.handle_illegal_action(decision.player, &msg, copy_obs);
558 }
559 self.state.turn.trigger_order = None;
560 }
561 _ => {
562 return self.handle_illegal_action(
563 decision.player,
564 "Invalid trigger order action",
565 copy_obs,
566 )
567 }
568 }
569 }
570 DecisionKind::Choice => {
571 let Some(choice_ref) = self.state.turn.choice.as_ref() else {
572 return self.handle_illegal_action(
573 decision.player,
574 "No choice pending",
575 copy_obs,
576 );
577 };
578 if choice_ref.player != decision.player {
579 return self.handle_illegal_action(
580 decision.player,
581 "Choice player mismatch",
582 copy_obs,
583 );
584 }
585 match action {
586 ActionDesc::ChoiceSelect { index } => {
587 let Some(choice) = self.state.turn.choice.take() else {
588 return self.handle_illegal_action(
589 decision.player,
590 "No choice pending",
591 copy_obs,
592 );
593 };
594 let idx = index as usize;
595 if idx >= MAX_CHOICE_OPTIONS {
596 return self.handle_illegal_action(
597 decision.player,
598 "Choice index out of range",
599 copy_obs,
600 );
601 }
602 let total = choice.total_candidates as usize;
603 let page_start = choice.page_start as usize;
604 let global_idx = page_start + idx;
605 if global_idx >= total {
606 return self.handle_illegal_action(
607 decision.player,
608 "Choice index out of range",
609 copy_obs,
610 );
611 }
612 let Some(option) = choice.options.get(global_idx).copied() else {
613 return self.handle_illegal_action(
614 decision.player,
615 "Choice option missing",
616 copy_obs,
617 );
618 };
619 if self.recording {
620 self.log_event(Event::ChoiceMade {
621 choice_id: choice.id,
622 player: decision.player,
623 reason: choice.reason,
624 option,
625 });
626 }
627 self.recycle_choice_options(choice.options);
628 self.apply_choice_effect(
629 choice.reason,
630 choice.player,
631 option,
632 choice.pending_trigger,
633 );
634 }
635 ActionDesc::ChoicePrevPage | ActionDesc::ChoiceNextPage => {
636 let nav = {
637 let Some(choice) = self.state.turn.choice.as_mut() else {
638 return self.handle_illegal_action(
639 decision.player,
640 "No choice pending",
641 copy_obs,
642 );
643 };
644 let total = choice.total_candidates as usize;
645 let page_size = MAX_CHOICE_OPTIONS;
646 let current = choice.page_start as usize;
647 let new_start = match action {
648 ActionDesc::ChoicePrevPage => {
649 if current < page_size {
650 None
651 } else {
652 Some(current - page_size)
653 }
654 }
655 ActionDesc::ChoiceNextPage => {
656 if current + page_size >= total {
657 None
658 } else {
659 Some(current + page_size)
660 }
661 }
662 _ => None,
663 };
664 if let Some(new_start) = new_start {
665 let from_start = choice.page_start;
666 choice.page_start = new_start as u16;
667 Some((choice.id, choice.player, from_start, choice.page_start))
668 } else {
669 None
670 }
671 };
672 let Some((choice_id, player, from_start, to_start)) = nav else {
673 return self.handle_illegal_action(
674 decision.player,
675 "Choice page out of range",
676 copy_obs,
677 );
678 };
679 if self.recording {
680 self.log_event(Event::ChoicePageChanged {
681 choice_id,
682 player,
683 from_start,
684 to_start,
685 });
686 }
687 }
688 _ => {
689 return self.handle_illegal_action(
690 decision.player,
691 "Invalid choice action",
692 copy_obs,
693 )
694 }
695 }
696 }
697 }
698
699 self.decision = None;
700 self.state.turn.decision_count += 1;
701 if self.state.turn.decision_count >= self.config.max_decisions {
702 self.state.terminal = Some(TerminalResult::Timeout);
703 }
704
705 self.advance_until_decision();
706 self.update_action_cache();
707 if self.maybe_validate_state("post_action") || self.is_fault_latched() {
708 return Ok(self.build_fault_step_outcome(copy_obs));
709 }
710
711 self.update_no_progress_counter(progress_before);
712 reward += self.compute_reward(
713 decision.player,
714 &self.pending_damage_delta,
715 &progress_before,
716 );
717 Ok(self.build_outcome_with_obs(reward, copy_obs))
718 }
719
720 pub(in crate::env) fn progress_signature(&self) -> ProgressSignature {
721 let mut signature = ProgressSignature {
722 active_player: self.state.turn.active_player,
723 turn_number: self.state.turn.turn_number,
724 phase: self.state.turn.phase,
725 choice_id: self.state.turn.choice.as_ref().map(|choice| choice.id),
726 choice_page_start: self
727 .state
728 .turn
729 .choice
730 .as_ref()
731 .map_or(0, |choice| choice.page_start),
732 choice_total_candidates: self
733 .state
734 .turn
735 .choice
736 .as_ref()
737 .map_or(0, |choice| choice.total_candidates),
738 ..ProgressSignature::default()
739 };
740 for player in 0..2usize {
741 let state = &self.state.players[player];
742 signature.deck_counts[player] = state.deck.len() as u16;
743 signature.hand_counts[player] = state.hand.len() as u16;
744 signature.waiting_room_counts[player] = state.waiting_room.len() as u16;
745 signature.clock_counts[player] = state.clock.len() as u16;
746 signature.level_counts[player] = state.level.len() as u16;
747 signature.stock_counts[player] = state.stock.len() as u16;
748 signature.memory_counts[player] = state.memory.len() as u16;
749 signature.climax_counts[player] = state.climax.len() as u16;
750 signature.resolution_counts[player] = state.resolution.len() as u16;
751 signature.occupied_stage_counts[player] = state
752 .stage
753 .iter()
754 .filter(|slot| slot.card.is_some())
755 .count() as u16;
756 signature.reversed_stage_counts[player] = state
757 .stage
758 .iter()
759 .filter(|slot| slot.card.is_some() && slot.status == StageStatus::Reverse)
760 .count() as u16;
761 signature.live_stage_counts[player] = state
762 .stage
763 .iter()
764 .filter(|slot| slot.card.is_some() && slot.status != StageStatus::Reverse)
765 .count() as u16;
766 }
767 signature
768 }
769
770 pub(crate) fn last_action_main_flags(&self) -> (bool, bool) {
771 match (
772 self.last_action_decision_kind,
773 self.last_action_desc.as_ref(),
774 ) {
775 (Some(DecisionKind::Main), Some(ActionDesc::MainMove { .. })) => (true, false),
776 (Some(DecisionKind::Main), Some(ActionDesc::Pass)) => (false, true),
777 _ => (false, false),
778 }
779 }
780
781 pub(in crate::env) fn update_no_progress_counter(&mut self, before: ProgressSignature) {
782 if self.state.terminal.is_some() {
783 self.no_progress_decisions = 0;
784 return;
785 }
786 let limit = self.curriculum.max_no_progress_decisions;
787 if limit == 0 {
788 self.no_progress_decisions = 0;
789 return;
790 }
791 let after = self.progress_signature();
792 if after != before {
793 self.no_progress_decisions = 0;
794 return;
795 }
796 self.no_progress_decisions = self.no_progress_decisions.saturating_add(1);
797 if self.no_progress_decisions >= limit {
798 self.state.terminal = Some(TerminalResult::Timeout);
799 self.decision = None;
800 self.update_action_cache();
801 }
802 }
803
804 pub(in crate::env) fn compute_reward(
805 &self,
806 perspective: u8,
807 damage_delta: &[i32; 2],
808 progress_before: &ProgressSignature,
809 ) -> f32 {
810 let RewardConfig {
811 terminal_win,
812 terminal_loss,
813 terminal_draw,
814 terminal_timeout,
815 enable_shaping,
816 damage_reward,
817 level_reward,
818 board_reward,
819 no_progress_penalty,
820 } = &self.config.reward;
821 if let Some(term) = self.state.terminal {
822 return match term {
823 TerminalResult::Win { winner } => {
824 if winner == perspective {
825 *terminal_win
826 } else {
827 *terminal_loss
828 }
829 }
830 TerminalResult::Draw => *terminal_draw,
831 TerminalResult::Timeout => *terminal_timeout,
832 };
833 }
834 if *enable_shaping {
835 let mut reward = 0.0;
836 let p = perspective as usize;
837 let opp = 1 - p;
838 reward += *damage_reward * damage_delta[opp] as f32;
839 reward -= *damage_reward * damage_delta[p] as f32;
840 let progress_after = self.progress_signature();
841 let level_delta = (progress_after.level_counts[opp] as i32
842 - progress_before.level_counts[opp] as i32)
843 - (progress_after.level_counts[p] as i32 - progress_before.level_counts[p] as i32);
844 reward += *level_reward * level_delta as f32;
845 let board_delta = (progress_after.live_stage_counts[p] as i32
846 - progress_before.live_stage_counts[p] as i32)
847 - (progress_after.live_stage_counts[opp] as i32
848 - progress_before.live_stage_counts[opp] as i32);
849 reward += *board_reward * board_delta as f32;
850 if progress_after == *progress_before {
851 reward -= *no_progress_penalty;
852 }
853 return reward;
854 }
855 0.0
856 }
857}