1use anyhow::{anyhow, Result};
2use std::collections::BTreeSet;
3use std::sync::Arc;
4
5use crate::config::{
6 CurriculumConfig, EnvConfig, ErrorPolicy, ObservationVisibility, RewardConfig,
7};
8use crate::db::{CardDb, CardId, CardStatic, CardType};
9use crate::encode::{
10 encode_observation_with_slot_power, fill_action_mask, ACTION_ENCODING_VERSION,
11 OBS_ENCODING_VERSION, OBS_LEN,
12};
13use crate::events::{Event, Zone};
14use crate::legal::{ActionDesc, Decision, DecisionKind};
15use crate::replay::{ReplayConfig, ReplayEvent, ReplayWriter, StepMeta};
16use crate::state::{
17 CardInstance, CardInstanceId, ChoiceOptionRef, DamageType, GameState, ModifierDuration,
18 ModifierKind, Phase, TargetRef, TerminalResult, TimingWindow,
19};
20use crate::util::Rng64;
21
22#[derive(Clone, Debug)]
24pub struct EnvInfo {
25 pub obs_version: u32,
26 pub action_version: u32,
27 pub decision_kind: i8,
28 pub current_player: i8,
29 pub actor: i8,
30 pub decision_count: u32,
31 pub tick_count: u32,
32 pub terminal: Option<TerminalResult>,
33 pub illegal_action: bool,
34 pub engine_error: bool,
35 pub engine_error_code: u8,
36}
37
38#[derive(Clone, Debug)]
40pub struct StepOutcome {
41 pub obs: Vec<i32>,
42 pub reward: f32,
43 pub terminated: bool,
44 pub truncated: bool,
45 pub info: EnvInfo,
46}
47
48#[derive(Clone, Copy, Debug)]
49struct VisibilityContext {
50 viewer: Option<u8>,
51 mode: ObservationVisibility,
52 policies_enabled: bool,
53}
54
55impl VisibilityContext {
56 fn is_public(self) -> bool {
57 self.policies_enabled && self.mode == ObservationVisibility::Public
58 }
59}
60
61#[derive(Clone, Copy, Debug, PartialEq, Eq)]
62pub enum EngineErrorCode {
63 None = 0,
64 StackAutoResolveCap = 1,
65 TriggerQuiescenceCap = 2,
66 Panic = 3,
67 ActionError = 4,
68}
69
70#[derive(Clone, Copy, Debug, Default)]
71pub struct DebugConfig {
72 pub fingerprint_every_n: u32,
73 pub event_ring_capacity: usize,
74}
75
76pub struct GameEnv {
78 pub db: Arc<CardDb>,
79 pub config: EnvConfig,
80 pub curriculum: CurriculumConfig,
81 pub state: GameState,
82 pub env_id: u32,
83 pub episode_index: u32,
84 pub decision: Option<Decision>,
85 action_cache: ActionCache,
86 decision_id: u32,
87 pub last_action_desc: Option<ActionDesc>,
88 pub last_action_player: Option<u8>,
89 pub last_illegal_action: bool,
90 pub last_engine_error: bool,
91 pub last_engine_error_code: EngineErrorCode,
92 pub last_perspective: u8,
93 pub pending_damage_delta: [i32; 2],
94 pub obs_buf: Vec<i32>,
95 slot_power_cache: [[i32; crate::encode::MAX_STAGE]; 2],
96 slot_power_dirty: [[bool; crate::encode::MAX_STAGE]; 2],
97 slot_power_cache_card: [[CardId; crate::encode::MAX_STAGE]; 2],
98 slot_power_cache_mod_turn: [[i32; crate::encode::MAX_STAGE]; 2],
99 slot_power_cache_mod_battle: [[i32; crate::encode::MAX_STAGE]; 2],
100 rule_actions_dirty: bool,
101 continuous_modifiers_dirty: bool,
102 last_rule_action_phase: Phase,
103 pub replay_config: ReplayConfig,
104 pub replay_writer: Option<ReplayWriter>,
105 pub replay_actions: Vec<ActionDesc>,
106 pub replay_events: Vec<ReplayEvent>,
107 canonical_events: Vec<Event>,
108 pub replay_steps: Vec<StepMeta>,
109 pub recording: bool,
110 pub meta_rng: Rng64,
111 pub episode_seed: u64,
112 pub scratch_replacement_indices: Vec<usize>,
113 scratch: EnvScratch,
114 revealed_to_viewer: [BTreeSet<CardInstanceId>; 2],
115 debug: DebugConfig,
116 debug_event_ring: Option<[EventRing; 2]>,
117}
118
119#[derive(Clone, Copy, Debug)]
120struct DamageIntentLocal {
121 source_player: u8,
122 source_slot: Option<u8>,
123 target: u8,
124 amount: i32,
125 damage_type: DamageType,
126 cancelable: bool,
127 refresh_penalty: bool,
128}
129
130struct EnvScratch {
131 targets: Vec<TargetRef>,
132 choice_options: Vec<ChoiceOptionRef>,
133 priority_actions: Vec<ActionDesc>,
134}
135
136impl EnvScratch {
137 fn new() -> Self {
138 Self {
139 targets: Vec::with_capacity(32),
140 choice_options: Vec::with_capacity(32),
141 priority_actions: Vec::with_capacity(16),
142 }
143 }
144}
145
146struct ActionCache {
147 mask: Vec<u8>,
148 lookup: Vec<Option<ActionDesc>>,
149 legal_actions: Vec<ActionDesc>,
150 decision_id: u32,
151 decision_kind: Option<DecisionKind>,
152 decision_player: u8,
153}
154
155impl ActionCache {
156 fn new() -> Self {
157 Self {
158 mask: vec![0u8; crate::encode::ACTION_SPACE_SIZE],
159 lookup: vec![None; crate::encode::ACTION_SPACE_SIZE],
160 legal_actions: Vec::new(),
161 decision_id: 0,
162 decision_kind: None,
163 decision_player: 0,
164 }
165 }
166
167 fn clear(&mut self) {
168 if self.mask.len() != crate::encode::ACTION_SPACE_SIZE {
169 self.mask.resize(crate::encode::ACTION_SPACE_SIZE, 0);
170 }
171 self.mask.fill(0);
172 if self.lookup.len() != crate::encode::ACTION_SPACE_SIZE {
173 self.lookup.resize(crate::encode::ACTION_SPACE_SIZE, None);
174 }
175 for slot in self.lookup.iter_mut() {
176 *slot = None;
177 }
178 self.legal_actions.clear();
179 self.decision_id = 0;
180 self.decision_kind = None;
181 self.decision_player = 0;
182 }
183
184 fn update(
185 &mut self,
186 state: &GameState,
187 decision: &Decision,
188 decision_id: u32,
189 db: &CardDb,
190 curriculum: &CurriculumConfig,
191 allowed_card_sets: Option<&std::collections::HashSet<String>>,
192 ) {
193 if self.decision_id == decision_id
194 && self.decision_kind == Some(decision.kind)
195 && self.decision_player == decision.player
196 {
197 return;
198 }
199 let actions =
200 crate::legal::legal_actions_cached(state, decision, db, curriculum, allowed_card_sets);
201 fill_action_mask(&actions, &mut self.mask, &mut self.lookup);
202 self.legal_actions = actions;
203 self.decision_id = decision_id;
204 self.decision_kind = Some(decision.kind);
205 self.decision_player = decision.player;
206 }
207}
208
209struct EventRing {
210 capacity: usize,
211 events: Vec<ReplayEvent>,
212 next: usize,
213 full: bool,
214}
215
216impl EventRing {
217 fn new(capacity: usize) -> Self {
218 let mut events = Vec::with_capacity(capacity);
219 events.reserve(capacity);
220 Self {
221 capacity,
222 events,
223 next: 0,
224 full: false,
225 }
226 }
227
228 fn clear(&mut self) {
229 self.events.clear();
230 self.next = 0;
231 self.full = false;
232 }
233
234 fn push(&mut self, event: ReplayEvent) {
235 if self.capacity == 0 {
236 return;
237 }
238 if self.events.len() < self.capacity {
239 self.events.push(event);
240 if self.events.len() == self.capacity {
241 self.full = true;
242 self.next = 0;
243 }
244 } else {
245 self.events[self.next] = event;
246 self.next = (self.next + 1) % self.capacity;
247 }
248 }
249
250 fn len(&self) -> usize {
251 if self.capacity == 0 {
252 0
253 } else if self.full {
254 self.capacity
255 } else {
256 self.events.len()
257 }
258 }
259
260 fn snapshot_codes<F: Fn(&ReplayEvent) -> u32>(&self, out: &mut [u32], code_fn: F) -> usize {
261 let len = self.len();
262 if len == 0 {
263 for slot in out.iter_mut() {
264 *slot = 0;
265 }
266 return 0;
267 }
268 let cap = self.capacity;
269 for (i, slot) in out.iter_mut().enumerate() {
270 if i >= len {
271 *slot = 0;
272 continue;
273 }
274 let idx = if self.full { (self.next + i) % cap } else { i };
275 *slot = code_fn(&self.events[idx]);
276 }
277 len
278 }
279
280 fn snapshot_events(&self) -> Vec<ReplayEvent> {
281 let len = self.len();
282 if len == 0 {
283 return Vec::new();
284 }
285 let cap = self.capacity;
286 let mut out = Vec::with_capacity(len);
287 for i in 0..len {
288 let idx = if self.full { (self.next + i) % cap } else { i };
289 out.push(self.events[idx].clone());
290 }
291 out
292 }
293}
294
295fn event_code(event: &ReplayEvent) -> u32 {
296 use crate::events::Event;
297 match event {
298 Event::Draw { .. } => 1,
299 Event::Damage { .. } => 2,
300 Event::DamageCancel { .. } => 3,
301 Event::DamageIntent { .. } => 4,
302 Event::DamageModifierApplied { .. } => 5,
303 Event::DamageModified { .. } => 6,
304 Event::DamageCommitted { .. } => 7,
305 Event::ReversalCommitted { .. } => 8,
306 Event::Reveal { .. } => 9,
307 Event::TriggerQueued { .. } => 10,
308 Event::TriggerGrouped { .. } => 11,
309 Event::TriggerResolved { .. } => 12,
310 Event::TriggerCanceled { .. } => 13,
311 Event::TimingWindowEntered { .. } => 14,
312 Event::PriorityGranted { .. } => 15,
313 Event::PriorityPassed { .. } => 16,
314 Event::StackGroupPresented { .. } => 17,
315 Event::StackOrderChosen { .. } => 18,
316 Event::StackPushed { .. } => 19,
317 Event::StackResolved { .. } => 20,
318 Event::AutoResolveCapExceeded { .. } => 21,
319 Event::WindowAdvanced { .. } => 22,
320 Event::ChoicePresented { .. } => 23,
321 Event::ChoicePageChanged { .. } => 24,
322 Event::ChoiceMade { .. } => 25,
323 Event::ChoiceAutopicked { .. } => 26,
324 Event::ChoiceSkipped { .. } => 27,
325 Event::ZoneMove { .. } => 28,
326 Event::ControlChanged { .. } => 29,
327 Event::ModifierAdded { .. } => 30,
328 Event::ModifierRemoved { .. } => 31,
329 Event::Concede { .. } => 32,
330 Event::Play { .. } => 33,
331 Event::PlayEvent { .. } => 34,
332 Event::PlayClimax { .. } => 35,
333 Event::Trigger { .. } => 36,
334 Event::Attack { .. } => 37,
335 Event::AttackType { .. } => 38,
336 Event::Counter { .. } => 39,
337 Event::Clock { .. } => 40,
338 Event::Shuffle { .. } => 41,
339 Event::Refresh { .. } => 42,
340 Event::RefreshPenalty { .. } => 43,
341 Event::LevelUpChoice { .. } => 44,
342 Event::Encore { .. } => 45,
343 Event::Stand { .. } => 46,
344 Event::EndTurn { .. } => 47,
345 Event::Terminal { .. } => 48,
346 }
347}
348
349const MAX_CHOICE_OPTIONS: usize = crate::encode::CHOICE_COUNT;
350pub const STACK_AUTO_RESOLVE_CAP: u32 = 256;
351pub const CHECK_TIMING_QUIESCENCE_CAP: u32 = 256;
352pub const HAND_LIMIT: usize = 7;
353
354const TRIGGER_EFFECT_SOUL: u8 = 0;
355const TRIGGER_EFFECT_DRAW: u8 = 1;
356const TRIGGER_EFFECT_SHOT: u8 = 2;
357const TRIGGER_EFFECT_GATE: u8 = 3;
358const TRIGGER_EFFECT_BOUNCE: u8 = 4;
359const TRIGGER_EFFECT_STANDBY: u8 = 5;
360const TRIGGER_EFFECT_TREASURE_STOCK: u8 = 6;
361const TRIGGER_EFFECT_TREASURE_MOVE: u8 = 7;
362
363#[derive(Clone, Copy, Debug)]
364struct TriggerCompileContext {
365 source_card: CardId,
366 standby_slot: Option<u8>,
367 treasure_take_stock: Option<bool>,
368}
369
370mod interaction;
371mod modifiers;
372mod movement;
373mod phases;
374mod visibility;
375
376impl GameEnv {
377 fn validate_deck_lists(db: &CardDb, deck_lists: &[Vec<CardId>; 2]) {
378 for (player, deck) in deck_lists.iter().enumerate() {
379 assert!(
380 deck.len() == crate::encode::MAX_DECK,
381 "Deck {player} has {} cards (must be {})",
382 deck.len(),
383 crate::encode::MAX_DECK
384 );
385 let mut climax_count = 0usize;
386 let mut counts: std::collections::HashMap<CardId, usize> =
387 std::collections::HashMap::new();
388 for &card_id in deck {
389 let card = db
390 .get(card_id)
391 .unwrap_or_else(|| panic!("Deck {player} contains unknown card id {card_id}"));
392 if card.card_type == CardType::Climax {
393 climax_count += 1;
394 }
395 *counts.entry(card_id).or_insert(0) += 1;
396 }
397 assert!(
398 climax_count <= 8,
399 "Deck {player} has {climax_count} climax cards (max 8)"
400 );
401 for (card_id, count) in counts {
402 assert!(
403 count <= 4,
404 "Deck {player} has {count} copies of card {card_id} (max 4)"
405 );
406 }
407 }
408 }
409
410 pub fn add_modifier(
411 &mut self,
412 source: CardId,
413 target_player: u8,
414 target_slot: u8,
415 kind: ModifierKind,
416 magnitude: i32,
417 duration: ModifierDuration,
418 ) -> Option<u32> {
419 self.add_modifier_instance(
420 source,
421 None,
422 target_player,
423 target_slot,
424 kind,
425 magnitude,
426 duration,
427 crate::state::ModifierLayer::Effect,
428 )
429 }
430
431 pub(crate) fn mark_rule_actions_dirty(&mut self) {
432 self.rule_actions_dirty = true;
433 }
434
435 pub(crate) fn mark_continuous_modifiers_dirty(&mut self) {
436 self.continuous_modifiers_dirty = true;
437 }
438
439 pub fn new(
440 db: Arc<CardDb>,
441 config: EnvConfig,
442 curriculum: CurriculumConfig,
443 seed: u64,
444 replay_config: ReplayConfig,
445 replay_writer: Option<ReplayWriter>,
446 env_id: u32,
447 ) -> Self {
448 Self::validate_deck_lists(&db, &config.deck_lists);
449 let starting_player = (seed as u8) & 1;
450 let state = GameState::new(
451 config.deck_lists[0].clone(),
452 config.deck_lists[1].clone(),
453 seed,
454 starting_player,
455 );
456 let mut curriculum = curriculum;
457 curriculum.rebuild_cache();
458 let mut replay_config = replay_config;
459 replay_config.rebuild_cache();
460 let mut env = Self {
461 db,
462 config,
463 curriculum,
464 state,
465 env_id,
466 episode_index: 0,
467 decision: None,
468 action_cache: ActionCache::new(),
469 decision_id: 0,
470 last_action_desc: None,
471 last_action_player: None,
472 last_illegal_action: false,
473 last_engine_error: false,
474 last_engine_error_code: EngineErrorCode::None,
475 last_perspective: 0,
476 pending_damage_delta: [0, 0],
477 obs_buf: vec![0; OBS_LEN],
478 slot_power_cache: [[0; crate::encode::MAX_STAGE]; 2],
479 slot_power_dirty: [[true; crate::encode::MAX_STAGE]; 2],
480 slot_power_cache_card: [[0; crate::encode::MAX_STAGE]; 2],
481 slot_power_cache_mod_turn: [[0; crate::encode::MAX_STAGE]; 2],
482 slot_power_cache_mod_battle: [[0; crate::encode::MAX_STAGE]; 2],
483 rule_actions_dirty: true,
484 continuous_modifiers_dirty: true,
485 last_rule_action_phase: Phase::Stand,
486 replay_config,
487 replay_writer,
488 replay_actions: Vec::new(),
489 replay_events: Vec::new(),
490 canonical_events: Vec::new(),
491 replay_steps: Vec::new(),
492 recording: false,
493 meta_rng: Rng64::new(seed ^ 0xABCDEF1234567890),
494 episode_seed: seed,
495 scratch_replacement_indices: Vec::new(),
496 scratch: EnvScratch::new(),
497 revealed_to_viewer: std::array::from_fn(|_| BTreeSet::new()),
498 debug: DebugConfig::default(),
499 debug_event_ring: None,
500 };
501 env.reset();
502 env
503 }
504
505 pub fn reset(&mut self) -> StepOutcome {
506 self.reset_with_obs(true)
507 }
508
509 pub fn reset_no_copy(&mut self) -> StepOutcome {
510 self.reset_with_obs(false)
511 }
512
513 pub fn canonical_events(&self) -> &[Event] {
514 &self.canonical_events
515 }
516
517 pub fn decision_id(&self) -> u32 {
518 self.decision_id
519 }
520
521 pub fn action_mask(&self) -> &[u8] {
522 &self.action_cache.mask
523 }
524
525 pub fn action_lookup(&self) -> &[Option<ActionDesc>] {
526 &self.action_cache.lookup
527 }
528
529 pub fn legal_actions(&self) -> &[ActionDesc] {
530 &self.action_cache.legal_actions
531 }
532
533 pub fn debug_event_ring_codes(&self, viewer: u8, out: &mut [u32]) -> u16 {
534 let Some(rings) = self.debug_event_ring.as_ref() else {
535 for slot in out.iter_mut() {
536 *slot = 0;
537 }
538 return 0;
539 };
540 let ring = &rings[viewer as usize % 2];
541 let count = ring.snapshot_codes(out, event_code);
542 count as u16
543 }
544
545 pub fn debug_event_ring_snapshot(&self, viewer: u8) -> Vec<ReplayEvent> {
546 let Some(rings) = self.debug_event_ring.as_ref() else {
547 return Vec::new();
548 };
549 rings[viewer as usize % 2].snapshot_events()
550 }
551
552 fn reset_with_obs(&mut self, copy_obs: bool) -> StepOutcome {
553 let episode_seed = self.meta_rng.next_u64();
554 let starting_player = if (episode_seed & 1) == 1 { 1 } else { 0 };
555 self.episode_seed = episode_seed;
556 self.episode_index = self.episode_index.wrapping_add(1);
557 Self::validate_deck_lists(&self.db, &self.config.deck_lists);
558 self.state = GameState::new(
559 self.config.deck_lists[0].clone(),
560 self.config.deck_lists[1].clone(),
561 episode_seed,
562 starting_player,
563 );
564 self.slot_power_cache = [[0; crate::encode::MAX_STAGE]; 2];
565 self.slot_power_dirty = [[true; crate::encode::MAX_STAGE]; 2];
566 self.slot_power_cache_card = [[0; crate::encode::MAX_STAGE]; 2];
567 self.slot_power_cache_mod_turn = [[0; crate::encode::MAX_STAGE]; 2];
568 self.slot_power_cache_mod_battle = [[0; crate::encode::MAX_STAGE]; 2];
569 self.rule_actions_dirty = true;
570 self.continuous_modifiers_dirty = true;
571 self.last_rule_action_phase = self.state.turn.phase;
572 self.decision = None;
573 self.action_cache.clear();
574 self.decision_id = 0;
575 self.last_action_desc = None;
576 self.last_action_player = None;
577 self.last_illegal_action = false;
578 self.last_engine_error = false;
579 self.last_engine_error_code = EngineErrorCode::None;
580 self.last_perspective = self.state.turn.starting_player;
581 self.pending_damage_delta = [0, 0];
582 if self.obs_buf.len() != OBS_LEN {
583 self.obs_buf.resize(OBS_LEN, 0);
584 }
585 self.replay_actions.clear();
586 self.replay_events.clear();
587 self.canonical_events.clear();
588 self.replay_steps.clear();
589 for set in &mut self.revealed_to_viewer {
590 set.clear();
591 }
592 if let Some(rings) = self.debug_event_ring.as_mut() {
593 for ring in rings.iter_mut() {
594 ring.clear();
595 }
596 }
597 self.recording = self.replay_config.enabled
598 && self.meta_rng.next_u32() <= self.replay_config.sample_threshold;
599 self.scratch_replacement_indices.clear();
600
601 for player in 0..2 {
602 self.shuffle_deck(player as u8);
603 self.draw_to_hand(player as u8, 5);
604 }
605
606 self.advance_until_decision();
607 self.update_action_cache();
608 self.maybe_validate_state("reset");
609 self.build_outcome_with_obs(0.0, copy_obs)
610 }
611
612 pub(crate) fn clear_status_flags(&mut self) {
613 self.last_illegal_action = false;
614 self.last_engine_error = false;
615 self.last_engine_error_code = EngineErrorCode::None;
616 }
617
618 fn run_rule_actions_if_needed(&mut self) {
619 if self.state.turn.phase != self.last_rule_action_phase {
620 self.rule_actions_dirty = true;
621 self.last_rule_action_phase = self.state.turn.phase;
622 }
623 if !self.rule_actions_dirty {
624 return;
625 }
626 self.rule_actions_dirty = false;
627 self.resolve_rule_actions_until_stable();
628 self.rule_actions_dirty = false;
629 }
630
631 pub(super) fn set_decision(&mut self, decision: Decision) {
632 self.decision = Some(decision);
633 self.decision_id = self.decision_id.wrapping_add(1);
634 }
635
636 pub(super) fn clear_decision(&mut self) {
637 self.decision = None;
638 }
639
640 pub fn set_debug_config(&mut self, debug: DebugConfig) {
641 self.debug = debug;
642 if debug.event_ring_capacity == 0 {
643 self.debug_event_ring = None;
644 } else {
645 self.debug_event_ring = Some(std::array::from_fn(|_| {
646 EventRing::new(debug.event_ring_capacity)
647 }));
648 }
649 }
650
651 pub fn apply_action_id(&mut self, action_id: usize) -> Result<StepOutcome> {
652 self.apply_action_id_internal(action_id, true)
653 }
654
655 pub fn apply_action_id_no_copy(&mut self, action_id: usize) -> Result<StepOutcome> {
656 self.apply_action_id_internal(action_id, false)
657 }
658
659 fn apply_action_id_internal(
660 &mut self,
661 action_id: usize,
662 copy_obs: bool,
663 ) -> Result<StepOutcome> {
664 self.last_illegal_action = false;
665 self.last_engine_error = false;
666 self.last_engine_error_code = EngineErrorCode::None;
667 if self.decision.is_none() {
668 return Err(anyhow!("No pending decision"));
669 }
670 self.last_perspective = self.decision.as_ref().unwrap().player;
671 let action = match self
672 .action_cache
673 .lookup
674 .get(action_id)
675 .and_then(|a| a.clone())
676 {
677 Some(action) => action,
678 None => {
679 let player = self.decision.as_ref().unwrap().player;
680 return self.handle_illegal_action(player, "Invalid action id", copy_obs);
681 }
682 };
683 self.apply_action_internal(action, copy_obs)
684 }
685
686 pub fn apply_action(&mut self, action: ActionDesc) -> Result<StepOutcome> {
687 self.apply_action_internal(action, true)
688 }
689
690 fn apply_action_internal(&mut self, action: ActionDesc, copy_obs: bool) -> Result<StepOutcome> {
691 let acting_player = self
692 .decision
693 .as_ref()
694 .map(|d| d.player)
695 .unwrap_or(self.last_perspective);
696 self.last_perspective = acting_player;
697 self.pending_damage_delta = [0, 0];
698 let decision_kind = self
699 .decision
700 .as_ref()
701 .map(|d| d.kind)
702 .unwrap_or(DecisionKind::Main);
703 let action_clone = action.clone();
704 if self.should_validate_state() {
705 if let Some(decision) = &self.decision {
706 let legal = crate::legal::legal_actions_cached(
707 &self.state,
708 decision,
709 &self.db,
710 &self.curriculum,
711 self.curriculum.allowed_card_sets_cache.as_ref(),
712 );
713 if !legal.contains(&action_clone) {
714 return self.handle_illegal_action(
715 decision.player,
716 "Action not in legal set",
717 copy_obs,
718 );
719 }
720 }
721 }
722 let outcome = match self.apply_action_impl(action, copy_obs) {
723 Ok(outcome) => Ok(outcome),
724 Err(err) => match self.config.error_policy {
725 ErrorPolicy::Strict => Err(err),
726 ErrorPolicy::LenientTerminate => {
727 self.last_engine_error = true;
728 self.last_engine_error_code = EngineErrorCode::ActionError;
729 self.last_perspective = acting_player;
730 self.state.terminal = Some(TerminalResult::Win {
731 winner: 1 - acting_player,
732 });
733 self.decision = None;
734 self.update_action_cache();
735 Ok(self
736 .build_outcome_with_obs(self.terminal_reward_for(acting_player), copy_obs))
737 }
738 ErrorPolicy::LenientNoop => {
739 self.last_engine_error = true;
740 self.last_engine_error_code = EngineErrorCode::ActionError;
741 self.last_perspective = acting_player;
742 self.update_action_cache();
743 Ok(self.build_outcome_with_obs(0.0, copy_obs))
744 }
745 },
746 }?;
747 if self.recording || self.should_validate_state() {
748 self.log_action(acting_player, action_clone);
749 self.replay_steps.push(StepMeta {
750 actor: acting_player,
751 decision_kind,
752 illegal_action: self.last_illegal_action,
753 engine_error: self.last_engine_error,
754 });
755 }
756 Ok(outcome)
757 }
758
759 fn apply_action_impl(&mut self, action: ActionDesc, copy_obs: bool) -> Result<StepOutcome> {
760 let decision = self
761 .decision
762 .clone()
763 .ok_or_else(|| anyhow!("No decision to apply"))?;
764 self.last_perspective = decision.player;
765 self.last_action_desc = Some(action.clone());
766 self.last_action_player = Some(decision.player);
767
768 let mut reward = 0.0f32;
769
770 if action == ActionDesc::Concede {
771 self.log_event(Event::Concede {
772 player: decision.player,
773 });
774 self.state.terminal = Some(TerminalResult::Win {
775 winner: 1 - decision.player,
776 });
777 self.decision = None;
778 self.state.turn.decision_count += 1;
779 self.update_action_cache();
780 self.maybe_validate_state("post_concede");
781 reward += self.compute_reward(decision.player, &self.pending_damage_delta);
782 return Ok(self.build_outcome_with_obs(reward, copy_obs));
783 }
784
785 match decision.kind {
786 DecisionKind::Mulligan => match action {
787 ActionDesc::MulliganSelect { hand_index } => {
788 let p = decision.player as usize;
789 let hi = hand_index as usize;
790 if hi >= self.state.players[p].hand.len() {
791 return self.handle_illegal_action(
792 decision.player,
793 "Mulligan hand index out of range",
794 copy_obs,
795 );
796 }
797 if hi >= crate::encode::MAX_HAND {
798 return self.handle_illegal_action(
799 decision.player,
800 "Mulligan hand index exceeds encoding",
801 copy_obs,
802 );
803 }
804 let bit = 1u64 << hi;
805 let current = &mut self.state.turn.mulligan_selected[p];
806 if *current & bit != 0 {
807 *current &= !bit;
808 } else {
809 *current |= bit;
810 }
811 }
812 ActionDesc::MulliganConfirm => {
813 let p = decision.player as usize;
814 let hand_len = self.state.players[p].hand.len();
815 let mut indices: Vec<usize> = Vec::new();
816 let mask = self.state.turn.mulligan_selected[p];
817 for idx in 0..hand_len.min(crate::encode::MAX_HAND) {
818 if mask & (1u64 << idx) != 0 {
819 indices.push(idx);
820 }
821 }
822 indices.sort_by(|a, b| b.cmp(a));
823 for idx in indices.iter().copied() {
824 if idx >= self.state.players[p].hand.len() {
825 continue;
826 }
827 let card = self.state.players[p].hand.remove(idx);
828 let from_slot = if idx <= u8::MAX as usize {
829 Some(idx as u8)
830 } else {
831 None
832 };
833 self.move_card_between_zones(
834 p as u8,
835 card,
836 Zone::Hand,
837 Zone::WaitingRoom,
838 from_slot,
839 None,
840 );
841 }
842 let draw_count = indices.len();
843 if draw_count > 0 {
844 self.draw_to_hand(p as u8, draw_count);
845 }
846 self.state.turn.mulligan_done[p] = true;
847 self.state.turn.mulligan_selected[p] = 0;
848 }
849 _ => {
850 return self.handle_illegal_action(
851 decision.player,
852 "Invalid mulligan action",
853 copy_obs,
854 )
855 }
856 },
857 DecisionKind::Clock => {
858 match action {
859 ActionDesc::Pass => {
860 self.log_event(Event::Clock {
861 player: decision.player,
862 card: None,
863 });
864 }
865 ActionDesc::Clock { hand_index } => {
866 let p = decision.player as usize;
867 let hi = hand_index as usize;
868 if hi >= self.state.players[p].hand.len() {
869 return self.handle_illegal_action(
870 decision.player,
871 "Clock hand index out of range",
872 copy_obs,
873 );
874 }
875 let card = self.state.players[p].hand.remove(hi);
876 let card_id = card.id;
877 self.move_card_between_zones(
878 decision.player,
879 card,
880 Zone::Hand,
881 Zone::Clock,
882 Some(hand_index),
883 None,
884 );
885 self.log_event(Event::Clock {
886 player: decision.player,
887 card: Some(card_id),
888 });
889 self.draw_to_hand(decision.player, 2);
890 self.check_level_up(decision.player);
891 }
892 _ => {
893 return self.handle_illegal_action(
894 decision.player,
895 "Invalid clock action",
896 copy_obs,
897 )
898 }
899 }
900 self.state.turn.phase_step = 2;
901 }
902 DecisionKind::Main => match action {
903 ActionDesc::Pass => {
904 if self.curriculum.enable_priority_windows {
905 self.state.turn.main_passed = true;
906 if self.state.turn.priority.is_none() {
907 self.enter_timing_window(TimingWindow::MainWindow, decision.player);
908 }
909 } else {
910 self.state.turn.main_passed = false;
911 self.state.turn.phase = Phase::Climax;
912 self.state.turn.phase_step = 0;
913 }
914 }
915 ActionDesc::MainPlayCharacter {
916 hand_index,
917 stage_slot,
918 } => {
919 if let Err(err) = self.play_character(decision.player, hand_index, stage_slot) {
920 return self.handle_illegal_action(
921 decision.player,
922 &err.to_string(),
923 copy_obs,
924 );
925 }
926 }
927 ActionDesc::MainPlayEvent { hand_index } => {
928 if let Err(err) = self.play_event(decision.player, hand_index) {
929 return self.handle_illegal_action(
930 decision.player,
931 &err.to_string(),
932 copy_obs,
933 );
934 }
935 }
936 ActionDesc::MainMove { from_slot, to_slot } => {
937 let p = decision.player as usize;
938 let fs = from_slot as usize;
939 let ts = to_slot as usize;
940 if fs >= self.state.players[p].stage.len()
941 || ts >= self.state.players[p].stage.len()
942 || fs == ts
943 {
944 return self.handle_illegal_action(
945 decision.player,
946 "Invalid move slots",
947 copy_obs,
948 );
949 }
950 if self.state.players[p].stage[fs].card.is_none() {
951 return self.handle_illegal_action(
952 decision.player,
953 "Move requires a source slot with a card",
954 copy_obs,
955 );
956 }
957 self.state.players[p].stage.swap(fs, ts);
958 self.remove_modifiers_for_slot(decision.player, from_slot);
959 self.remove_modifiers_for_slot(decision.player, to_slot);
960 self.mark_slot_power_dirty(decision.player, from_slot);
961 self.mark_slot_power_dirty(decision.player, to_slot);
962 self.mark_rule_actions_dirty();
963 self.mark_continuous_modifiers_dirty();
964 }
965 ActionDesc::MainActivateAbility {
966 slot,
967 ability_index,
968 } => {
969 let _ = (slot, ability_index);
970 return self.handle_illegal_action(
971 decision.player,
972 "Activated abilities only via priority window",
973 copy_obs,
974 );
975 }
976 _ => {
977 return self.handle_illegal_action(
978 decision.player,
979 "Invalid main action",
980 copy_obs,
981 )
982 }
983 },
984 DecisionKind::Climax => match action {
985 ActionDesc::Pass => {
986 self.state.turn.phase_step = 2;
987 if self.curriculum.enable_priority_windows {
988 self.enter_timing_window(TimingWindow::ClimaxWindow, decision.player);
989 }
990 }
991 ActionDesc::ClimaxPlay { hand_index } => {
992 if let Err(err) = self.play_climax(decision.player, hand_index) {
993 return self.handle_illegal_action(
994 decision.player,
995 &err.to_string(),
996 copy_obs,
997 );
998 }
999 self.state.turn.phase_step = 2;
1000 if self.curriculum.enable_priority_windows {
1001 self.enter_timing_window(TimingWindow::ClimaxWindow, decision.player);
1002 }
1003 }
1004 _ => {
1005 return self.handle_illegal_action(
1006 decision.player,
1007 "Invalid climax action",
1008 copy_obs,
1009 )
1010 }
1011 },
1012 DecisionKind::AttackDeclaration => match action {
1013 ActionDesc::Pass => {
1014 if self.curriculum.enable_encore {
1015 self.queue_encore_requests();
1016 } else {
1017 self.cleanup_reversed_to_waiting_room();
1018 }
1019 self.state.turn.phase = Phase::End;
1020 self.state.turn.phase_step = 0;
1021 self.state.turn.attack_phase_begin_done = false;
1022 self.state.turn.attack_decl_check_done = false;
1023 }
1024 ActionDesc::Attack { slot, attack_type } => {
1025 if let Err(err) = self.declare_attack(decision.player, slot, attack_type) {
1026 return self.handle_illegal_action(
1027 decision.player,
1028 &err.to_string(),
1029 copy_obs,
1030 );
1031 }
1032 }
1033 _ => {
1034 return self.handle_illegal_action(
1035 decision.player,
1036 "Invalid attack action",
1037 copy_obs,
1038 )
1039 }
1040 },
1041 DecisionKind::LevelUp => match action {
1042 ActionDesc::LevelUp { index } => {
1043 if self.state.turn.pending_level_up != Some(decision.player) {
1044 return self.handle_illegal_action(
1045 decision.player,
1046 "No pending level up",
1047 copy_obs,
1048 );
1049 }
1050 if let Err(err) = self.resolve_level_up(decision.player, index) {
1051 return self.handle_illegal_action(
1052 decision.player,
1053 &err.to_string(),
1054 copy_obs,
1055 );
1056 }
1057 }
1058 _ => {
1059 return self.handle_illegal_action(
1060 decision.player,
1061 "Invalid level up action",
1062 copy_obs,
1063 )
1064 }
1065 },
1066 DecisionKind::Encore => match action {
1067 ActionDesc::EncorePay { slot } => {
1068 if let Err(err) = self.resolve_encore(decision.player, slot, true) {
1069 return self.handle_illegal_action(
1070 decision.player,
1071 &err.to_string(),
1072 copy_obs,
1073 );
1074 }
1075 }
1076 ActionDesc::EncoreDecline { slot } => {
1077 if let Err(err) = self.resolve_encore(decision.player, slot, false) {
1078 return self.handle_illegal_action(
1079 decision.player,
1080 &err.to_string(),
1081 copy_obs,
1082 );
1083 }
1084 }
1085 _ => {
1086 return self.handle_illegal_action(
1087 decision.player,
1088 "Invalid encore action",
1089 copy_obs,
1090 )
1091 }
1092 },
1093 DecisionKind::TriggerOrder => {
1094 let Some(order) = self.state.turn.trigger_order.clone() else {
1095 return self.handle_illegal_action(
1096 decision.player,
1097 "No trigger order pending",
1098 copy_obs,
1099 );
1100 };
1101 if order.player != decision.player {
1102 return self.handle_illegal_action(
1103 decision.player,
1104 "Trigger order player mismatch",
1105 copy_obs,
1106 );
1107 }
1108 match action {
1109 ActionDesc::TriggerOrder { index } => {
1110 let idx = index as usize;
1111 if idx >= order.choices.len() {
1112 return self.handle_illegal_action(
1113 decision.player,
1114 "Trigger order index out of range",
1115 copy_obs,
1116 );
1117 }
1118 let trigger_id = order.choices[idx];
1119 let trigger_index = self
1120 .state
1121 .turn
1122 .pending_triggers
1123 .iter()
1124 .position(|t| t.id == trigger_id);
1125 let Some(trigger_index) = trigger_index else {
1126 return self.handle_illegal_action(
1127 decision.player,
1128 "Trigger already resolved",
1129 copy_obs,
1130 );
1131 };
1132 let trigger = self.state.turn.pending_triggers.remove(trigger_index);
1133 let _ = self.resolve_trigger(trigger);
1134 self.state.turn.trigger_order = None;
1135 }
1136 _ => {
1137 return self.handle_illegal_action(
1138 decision.player,
1139 "Invalid trigger order action",
1140 copy_obs,
1141 )
1142 }
1143 }
1144 }
1145 DecisionKind::Choice => {
1146 let Some(choice_ref) = self.state.turn.choice.as_ref() else {
1147 return self.handle_illegal_action(
1148 decision.player,
1149 "No choice pending",
1150 copy_obs,
1151 );
1152 };
1153 if choice_ref.player != decision.player {
1154 return self.handle_illegal_action(
1155 decision.player,
1156 "Choice player mismatch",
1157 copy_obs,
1158 );
1159 }
1160 match action {
1161 ActionDesc::ChoiceSelect { index } => {
1162 let Some(choice) = self.state.turn.choice.take() else {
1163 return self.handle_illegal_action(
1164 decision.player,
1165 "No choice pending",
1166 copy_obs,
1167 );
1168 };
1169 let idx = index as usize;
1170 if idx >= MAX_CHOICE_OPTIONS {
1171 return self.handle_illegal_action(
1172 decision.player,
1173 "Choice index out of range",
1174 copy_obs,
1175 );
1176 }
1177 let total = choice.total_candidates as usize;
1178 let page_start = choice.page_start as usize;
1179 let global_idx = page_start + idx;
1180 if global_idx >= total {
1181 return self.handle_illegal_action(
1182 decision.player,
1183 "Choice index out of range",
1184 copy_obs,
1185 );
1186 }
1187 let Some(option) = choice.options.get(global_idx).copied() else {
1188 return self.handle_illegal_action(
1189 decision.player,
1190 "Choice option missing",
1191 copy_obs,
1192 );
1193 };
1194 if self.recording {
1195 self.log_event(Event::ChoiceMade {
1196 choice_id: choice.id,
1197 player: decision.player,
1198 reason: choice.reason,
1199 option,
1200 });
1201 }
1202 self.recycle_choice_options(choice.options);
1203 self.apply_choice_effect(
1204 choice.reason,
1205 choice.player,
1206 option,
1207 choice.pending_trigger,
1208 );
1209 }
1210 ActionDesc::ChoicePrevPage | ActionDesc::ChoiceNextPage => {
1211 let nav = {
1212 let Some(choice) = self.state.turn.choice.as_mut() else {
1213 return self.handle_illegal_action(
1214 decision.player,
1215 "No choice pending",
1216 copy_obs,
1217 );
1218 };
1219 let total = choice.total_candidates as usize;
1220 let page_size = MAX_CHOICE_OPTIONS;
1221 let current = choice.page_start as usize;
1222 let new_start = match action {
1223 ActionDesc::ChoicePrevPage => {
1224 if current < page_size {
1225 None
1226 } else {
1227 Some(current - page_size)
1228 }
1229 }
1230 ActionDesc::ChoiceNextPage => {
1231 if current + page_size >= total {
1232 None
1233 } else {
1234 Some(current + page_size)
1235 }
1236 }
1237 _ => None,
1238 };
1239 if let Some(new_start) = new_start {
1240 let from_start = choice.page_start;
1241 choice.page_start = new_start as u16;
1242 Some((choice.id, choice.player, from_start, choice.page_start))
1243 } else {
1244 None
1245 }
1246 };
1247 let Some((choice_id, player, from_start, to_start)) = nav else {
1248 return self.handle_illegal_action(
1249 decision.player,
1250 "Choice page out of range",
1251 copy_obs,
1252 );
1253 };
1254 if self.recording {
1255 self.log_event(Event::ChoicePageChanged {
1256 choice_id,
1257 player,
1258 from_start,
1259 to_start,
1260 });
1261 }
1262 }
1263 _ => {
1264 return self.handle_illegal_action(
1265 decision.player,
1266 "Invalid choice action",
1267 copy_obs,
1268 )
1269 }
1270 }
1271 }
1272 }
1273
1274 self.decision = None;
1275 self.state.turn.decision_count += 1;
1276 if self.state.turn.decision_count >= self.config.max_decisions {
1277 self.state.terminal = Some(TerminalResult::Timeout);
1278 }
1279
1280 self.advance_until_decision();
1281 self.update_action_cache();
1282 self.maybe_validate_state("post_action");
1283
1284 reward += self.compute_reward(decision.player, &self.pending_damage_delta);
1285 Ok(self.build_outcome_with_obs(reward, copy_obs))
1286 }
1287
1288 fn compute_reward(&self, perspective: u8, damage_delta: &[i32; 2]) -> f32 {
1289 let RewardConfig {
1290 terminal_win,
1291 terminal_loss,
1292 terminal_draw,
1293 enable_shaping,
1294 damage_reward,
1295 } = &self.config.reward;
1296 if let Some(term) = self.state.terminal {
1297 return match term {
1298 TerminalResult::Win { winner } => {
1299 if winner == perspective {
1300 *terminal_win
1301 } else {
1302 *terminal_loss
1303 }
1304 }
1305 TerminalResult::Draw | TerminalResult::Timeout => *terminal_draw,
1306 };
1307 }
1308 if *enable_shaping {
1309 let mut reward = 0.0;
1310 let p = perspective as usize;
1311 let opp = 1 - p;
1312 reward += *damage_reward * damage_delta[opp] as f32;
1313 reward -= *damage_reward * damage_delta[p] as f32;
1314 return reward;
1315 }
1316 0.0
1317 }
1318
1319 fn resolve_quiescence_until_decision(&mut self) {
1320 let mut auto_resolve_steps: u32 = 0;
1321 loop {
1322 if self.state.terminal.is_some() || self.decision.is_some() {
1323 return;
1324 }
1325 self.run_rule_actions_if_needed();
1326 self.refresh_continuous_modifiers_if_needed();
1327 if let Some(player) = self.state.turn.pending_level_up {
1328 self.set_decision(Decision {
1329 player,
1330 kind: DecisionKind::LevelUp,
1331 focus_slot: None,
1332 });
1333 return;
1334 }
1335 if self.handle_trigger_pipeline() {
1336 if self.decision.is_some() {
1337 return;
1338 }
1339 continue;
1340 }
1341 if self.handle_priority_window() {
1342 if self.decision.is_some() {
1343 return;
1344 }
1345 continue;
1346 }
1347 if !self.curriculum.enable_priority_windows
1348 && self.state.turn.priority.is_none()
1349 && self.state.turn.choice.is_none()
1350 && self.state.turn.stack_order.is_none()
1351 && !self.state.turn.stack.is_empty()
1352 {
1353 auto_resolve_steps = auto_resolve_steps.saturating_add(1);
1354 if auto_resolve_steps > CHECK_TIMING_QUIESCENCE_CAP {
1355 self.log_event(Event::AutoResolveCapExceeded {
1356 cap: CHECK_TIMING_QUIESCENCE_CAP,
1357 stack_len: self.state.turn.stack.len() as u32,
1358 window: self.state.turn.active_window,
1359 });
1360 self.last_engine_error = true;
1361 self.last_engine_error_code = EngineErrorCode::TriggerQuiescenceCap;
1362 self.state.terminal = Some(TerminalResult::Timeout);
1363 return;
1364 }
1365 if let Some(item) = self.state.turn.stack.pop() {
1366 self.resolve_stack_item(&item);
1367 self.log_event(Event::StackResolved { item });
1368 continue;
1369 }
1370 }
1371 break;
1372 }
1373 }
1374
1375 pub(crate) fn update_action_cache(&mut self) {
1376 if self.decision.is_some() {
1377 let decision_kind = self
1378 .decision
1379 .as_ref()
1380 .map(|d| d.kind)
1381 .expect("decision kind");
1382 if decision_kind == DecisionKind::AttackDeclaration
1383 && self.state.turn.derived_attack.is_none()
1384 {
1385 self.recompute_derived_attack();
1386 }
1387 let decision = self.decision.as_ref().expect("decision present");
1388 self.last_perspective = decision.player;
1389 self.action_cache.update(
1390 &self.state,
1391 decision,
1392 self.decision_id,
1393 &self.db,
1394 &self.curriculum,
1395 self.curriculum.allowed_card_sets_cache.as_ref(),
1396 );
1397 } else {
1398 self.action_cache.clear();
1399 }
1400 }
1401
1402 fn should_validate_state(&self) -> bool {
1403 if cfg!(debug_assertions) {
1404 return true;
1405 }
1406 std::env::var("WEISS_VALIDATE_STATE").ok().as_deref() == Some("1")
1407 }
1408
1409 fn maybe_validate_state(&self, context: &str) {
1410 if !self.should_validate_state() {
1411 return;
1412 }
1413 if let Err(err) = self.validate_state() {
1414 panic!("validate_state failed at {context}: {err}");
1415 }
1416 }
1417
1418 pub fn validate_state(&self) -> Result<()> {
1419 use std::collections::{HashMap, HashSet};
1420 let mut errors = Vec::new();
1421
1422 let mut counts: [HashMap<CardId, i32>; 2] = [HashMap::new(), HashMap::new()];
1423 for (owner, owner_counts) in counts.iter_mut().enumerate() {
1424 let deck_list = &self.config.deck_lists[owner];
1425 for card in deck_list.iter().copied() {
1426 *owner_counts.entry(card).or_insert(0) += 1;
1427 }
1428 }
1429
1430 fn consume(
1431 counts: &mut [HashMap<CardId, i32>; 2],
1432 errors: &mut Vec<String>,
1433 owner: u8,
1434 card: CardId,
1435 zone: &str,
1436 ) {
1437 let owner_idx = owner as usize;
1438 let entry = counts[owner_idx].entry(card).or_insert(0);
1439 *entry -= 1;
1440 if *entry < 0 {
1441 errors.push(format!("Owner {owner} has extra card {card} in {zone}"));
1442 }
1443 }
1444
1445 let mut instance_ids: HashSet<CardInstanceId> = HashSet::new();
1446 fn check_instance(
1447 instance_ids: &mut HashSet<CardInstanceId>,
1448 errors: &mut Vec<String>,
1449 card: &CardInstance,
1450 zone: &str,
1451 ) {
1452 if card.instance_id == 0 {
1453 errors.push(format!("Card instance id 0 in {zone}"));
1454 return;
1455 }
1456 if !instance_ids.insert(card.instance_id) {
1457 errors.push(format!(
1458 "Duplicate instance id {} in {zone}",
1459 card.instance_id
1460 ));
1461 }
1462 }
1463
1464 for zone_player in 0..2 {
1465 let p = &self.state.players[zone_player];
1466 for card in &p.deck {
1467 consume(
1468 &mut counts,
1469 &mut errors,
1470 card.owner,
1471 card.id,
1472 &format!("p{zone_player} deck"),
1473 );
1474 check_instance(
1475 &mut instance_ids,
1476 &mut errors,
1477 card,
1478 &format!("p{zone_player} deck"),
1479 );
1480 }
1481 for card in &p.hand {
1482 consume(
1483 &mut counts,
1484 &mut errors,
1485 card.owner,
1486 card.id,
1487 &format!("p{zone_player} hand"),
1488 );
1489 check_instance(
1490 &mut instance_ids,
1491 &mut errors,
1492 card,
1493 &format!("p{zone_player} hand"),
1494 );
1495 }
1496 for card in &p.waiting_room {
1497 consume(
1498 &mut counts,
1499 &mut errors,
1500 card.owner,
1501 card.id,
1502 &format!("p{zone_player} waiting_room"),
1503 );
1504 check_instance(
1505 &mut instance_ids,
1506 &mut errors,
1507 card,
1508 &format!("p{zone_player} waiting_room"),
1509 );
1510 }
1511 for card in &p.clock {
1512 consume(
1513 &mut counts,
1514 &mut errors,
1515 card.owner,
1516 card.id,
1517 &format!("p{zone_player} clock"),
1518 );
1519 check_instance(
1520 &mut instance_ids,
1521 &mut errors,
1522 card,
1523 &format!("p{zone_player} clock"),
1524 );
1525 }
1526 for card in &p.level {
1527 consume(
1528 &mut counts,
1529 &mut errors,
1530 card.owner,
1531 card.id,
1532 &format!("p{zone_player} level"),
1533 );
1534 check_instance(
1535 &mut instance_ids,
1536 &mut errors,
1537 card,
1538 &format!("p{zone_player} level"),
1539 );
1540 }
1541 for card in &p.stock {
1542 consume(
1543 &mut counts,
1544 &mut errors,
1545 card.owner,
1546 card.id,
1547 &format!("p{zone_player} stock"),
1548 );
1549 check_instance(
1550 &mut instance_ids,
1551 &mut errors,
1552 card,
1553 &format!("p{zone_player} stock"),
1554 );
1555 }
1556 for card in &p.memory {
1557 consume(
1558 &mut counts,
1559 &mut errors,
1560 card.owner,
1561 card.id,
1562 &format!("p{zone_player} memory"),
1563 );
1564 check_instance(
1565 &mut instance_ids,
1566 &mut errors,
1567 card,
1568 &format!("p{zone_player} memory"),
1569 );
1570 }
1571 for card in &p.climax {
1572 consume(
1573 &mut counts,
1574 &mut errors,
1575 card.owner,
1576 card.id,
1577 &format!("p{zone_player} climax"),
1578 );
1579 check_instance(
1580 &mut instance_ids,
1581 &mut errors,
1582 card,
1583 &format!("p{zone_player} climax"),
1584 );
1585 }
1586 for card in &p.resolution {
1587 consume(
1588 &mut counts,
1589 &mut errors,
1590 card.owner,
1591 card.id,
1592 &format!("p{zone_player} resolution"),
1593 );
1594 check_instance(
1595 &mut instance_ids,
1596 &mut errors,
1597 card,
1598 &format!("p{zone_player} resolution"),
1599 );
1600 }
1601 for (slot_idx, slot) in p.stage.iter().enumerate() {
1602 if let Some(card) = slot.card {
1603 consume(
1604 &mut counts,
1605 &mut errors,
1606 card.owner,
1607 card.id,
1608 &format!("p{zone_player} stage[{slot_idx}]"),
1609 );
1610 check_instance(
1611 &mut instance_ids,
1612 &mut errors,
1613 &card,
1614 &format!("p{zone_player} stage[{slot_idx}]"),
1615 );
1616 }
1617 }
1618 }
1619
1620 for (owner, owner_counts) in counts.iter().enumerate() {
1621 for (card, remaining) in owner_counts.iter() {
1622 if *remaining != 0 {
1623 errors.push(format!(
1624 "Owner {owner} card {card} count mismatch ({remaining})"
1625 ));
1626 }
1627 }
1628 }
1629
1630 if let Some(decision) = &self.decision {
1631 if let Some(slot) = decision.focus_slot {
1632 if slot as usize >= self.state.players[decision.player as usize].stage.len() {
1633 errors.push("Decision focus slot out of range".to_string());
1634 }
1635 }
1636 match decision.kind {
1637 DecisionKind::AttackDeclaration => {
1638 if self.state.turn.attack.is_some() {
1639 errors.push("Attack declaration while attack context active".to_string());
1640 }
1641 }
1642 DecisionKind::LevelUp => {
1643 if self.state.turn.pending_level_up.is_none() {
1644 errors.push("Level up decision without pending level".to_string());
1645 }
1646 }
1647 DecisionKind::Encore => {
1648 let has = self
1649 .state
1650 .turn
1651 .encore_queue
1652 .iter()
1653 .any(|r| r.player == decision.player);
1654 if !has {
1655 errors.push("Encore decision without reversed options".to_string());
1656 }
1657 }
1658 DecisionKind::TriggerOrder => {
1659 if self.state.turn.trigger_order.is_none() {
1660 errors.push("Trigger order decision without pending order".to_string());
1661 }
1662 }
1663 DecisionKind::Choice => {
1664 if let Some(choice) = &self.state.turn.choice {
1665 if choice.player != decision.player {
1666 errors.push("Choice decision player mismatch".to_string());
1667 }
1668 } else {
1669 errors.push("Choice decision without pending choice".to_string());
1670 }
1671 }
1672 _ => {}
1673 }
1674 }
1675
1676 if self.state.turn.attack.is_some() && self.state.turn.phase != Phase::Attack {
1677 errors.push("Attack context outside Attack phase".to_string());
1678 }
1679
1680 if errors.is_empty() {
1681 return Ok(());
1682 }
1683
1684 let state_hash = crate::fingerprint::state_fingerprint(&self.state);
1685 let phase = self.state.turn.phase;
1686 let attack_step = self.state.turn.attack.as_ref().map(|c| c.step);
1687 let tail_len = 8usize;
1688 let actions_tail: Vec<String> = self
1689 .replay_actions
1690 .iter()
1691 .rev()
1692 .take(tail_len)
1693 .rev()
1694 .map(|a| format!("{a:?}"))
1695 .collect();
1696 let decisions_tail: Vec<String> = self
1697 .replay_steps
1698 .iter()
1699 .rev()
1700 .take(tail_len)
1701 .rev()
1702 .map(|s| format!("{:?}/{:?}", s.decision_kind, s.actor))
1703 .collect();
1704 let fallback_action = self
1705 .last_action_desc
1706 .as_ref()
1707 .map(|a| format!("{a:?}"))
1708 .unwrap_or_else(|| "None".to_string());
1709 let payload = format!(
1710 "seed={}\nphase={:?}\nattack_step={:?}\nlast_action={}\nactions_tail={:?}\ndecisions_tail={:?}\nstate_hash={}",
1711 self.episode_seed,
1712 phase,
1713 attack_step,
1714 fallback_action,
1715 actions_tail,
1716 decisions_tail,
1717 state_hash,
1718 );
1719 Err(anyhow!("{}\n{}", payload, errors.join("; ")))
1720 }
1721
1722 pub(crate) fn build_outcome_no_copy(&mut self, reward: f32) -> StepOutcome {
1723 self.build_outcome_with_obs(reward, false)
1724 }
1725
1726 fn build_outcome_with_obs(&mut self, reward: f32, copy_obs: bool) -> StepOutcome {
1727 let perspective = self
1728 .decision
1729 .as_ref()
1730 .map(|d| d.player)
1731 .unwrap_or(self.last_perspective);
1732 self.refresh_slot_power_cache();
1733 encode_observation_with_slot_power(
1734 &self.state,
1735 &self.db,
1736 &self.curriculum,
1737 perspective,
1738 self.decision.as_ref(),
1739 self.last_action_desc.as_ref(),
1740 self.last_action_player,
1741 self.config.observation_visibility,
1742 &self.slot_power_cache,
1743 &mut self.obs_buf,
1744 );
1745 let obs = if copy_obs {
1746 self.obs_buf.clone()
1747 } else {
1748 Vec::new()
1749 };
1750 let info = EnvInfo {
1751 obs_version: OBS_ENCODING_VERSION,
1752 action_version: ACTION_ENCODING_VERSION,
1753 decision_kind: self
1754 .decision
1755 .as_ref()
1756 .map(|d| match d.kind {
1757 DecisionKind::Mulligan => 0,
1758 DecisionKind::Clock => 1,
1759 DecisionKind::Main => 2,
1760 DecisionKind::Climax => 3,
1761 DecisionKind::AttackDeclaration => 4,
1762 DecisionKind::LevelUp => 5,
1763 DecisionKind::Encore => 6,
1764 DecisionKind::TriggerOrder => 7,
1765 DecisionKind::Choice => 8,
1766 })
1767 .unwrap_or(-1),
1768 current_player: self.decision.as_ref().map(|d| d.player as i8).unwrap_or(-1),
1769 actor: self.last_perspective as i8,
1770 decision_count: self.state.turn.decision_count,
1771 tick_count: self.state.turn.tick_count,
1772 terminal: self.state.terminal,
1773 illegal_action: self.last_illegal_action,
1774 engine_error: self.last_engine_error,
1775 engine_error_code: self.last_engine_error_code as u8,
1776 };
1777 let truncated = matches!(self.state.terminal, Some(TerminalResult::Timeout));
1778 let terminated = matches!(
1779 self.state.terminal,
1780 Some(TerminalResult::Win { .. } | TerminalResult::Draw)
1781 );
1782 StepOutcome {
1783 obs,
1784 reward,
1785 terminated,
1786 truncated,
1787 info,
1788 }
1789 }
1790
1791 pub(crate) fn advance_until_decision(&mut self) {
1792 let mut auto_resolve_steps: u32 = 0;
1793 loop {
1794 if self.state.terminal.is_some() {
1795 break;
1796 }
1797 self.resolve_pending_losses();
1798 self.run_rule_actions_if_needed();
1799 self.refresh_continuous_modifiers_if_needed();
1800 if self.decision.is_some() {
1801 break;
1802 }
1803 if self.state.turn.tick_count >= self.config.max_ticks {
1804 self.state.terminal = Some(TerminalResult::Timeout);
1805 break;
1806 }
1807 self.state.turn.tick_count += 1;
1808
1809 if let Some(player) = self.state.turn.pending_level_up {
1810 self.set_decision(Decision {
1811 player,
1812 kind: DecisionKind::LevelUp,
1813 focus_slot: None,
1814 });
1815 break;
1816 }
1817
1818 if self.handle_trigger_pipeline() {
1819 if self.decision.is_some() {
1820 break;
1821 }
1822 continue;
1823 }
1824
1825 if self.handle_priority_window() {
1826 if self.decision.is_some() {
1827 break;
1828 }
1829 continue;
1830 }
1831 if !self.curriculum.enable_priority_windows
1832 && self.state.turn.priority.is_none()
1833 && self.state.turn.choice.is_none()
1834 && self.state.turn.stack_order.is_none()
1835 && !self.state.turn.stack.is_empty()
1836 {
1837 auto_resolve_steps = auto_resolve_steps.saturating_add(1);
1838 if auto_resolve_steps > STACK_AUTO_RESOLVE_CAP {
1839 self.log_event(Event::AutoResolveCapExceeded {
1840 cap: STACK_AUTO_RESOLVE_CAP,
1841 stack_len: self.state.turn.stack.len() as u32,
1842 window: self.state.turn.active_window,
1843 });
1844 self.last_engine_error = true;
1845 self.last_engine_error_code = EngineErrorCode::StackAutoResolveCap;
1846 self.state.terminal = Some(TerminalResult::Timeout);
1847 break;
1848 }
1849 if let Some(item) = self.state.turn.stack.pop() {
1850 self.resolve_stack_item(&item);
1851 self.log_event(Event::StackResolved { item });
1852 continue;
1853 }
1854 }
1855
1856 if self.state.turn.stack.is_empty()
1857 && self.state.turn.pending_triggers.is_empty()
1858 && self.state.turn.choice.is_none()
1859 && self.state.turn.priority.is_none()
1860 && self.state.turn.stack_order.is_none()
1861 {
1862 self.cleanup_pending_resolution_cards();
1863 }
1864
1865 if !self.state.turn.encore_queue.is_empty() {
1866 if !self.state.turn.encore_begin_done {
1867 self.run_check_timing(crate::db::AbilityTiming::BeginEncoreStep);
1868 self.state.turn.encore_begin_done = true;
1869 continue;
1870 }
1871 if self.curriculum.enable_priority_windows && !self.state.turn.encore_window_done {
1872 self.state.turn.encore_window_done = true;
1873 if self.state.turn.priority.is_none() {
1874 self.enter_timing_window(
1875 TimingWindow::EncoreWindow,
1876 self.state.turn.active_player,
1877 );
1878 }
1879 break;
1880 }
1881 if self.state.turn.encore_step_player.is_none() {
1882 self.state.turn.encore_step_player = Some(self.state.turn.active_player);
1883 }
1884 let current = self.state.turn.encore_step_player.unwrap();
1885 let has_current = self
1886 .state
1887 .turn
1888 .encore_queue
1889 .iter()
1890 .any(|r| r.player == current);
1891 let next_player = if has_current {
1892 Some(current)
1893 } else {
1894 let other = 1 - current;
1895 if self
1896 .state
1897 .turn
1898 .encore_queue
1899 .iter()
1900 .any(|r| r.player == other)
1901 {
1902 self.state.turn.encore_step_player = Some(other);
1903 Some(other)
1904 } else {
1905 self.state.turn.encore_step_player = None;
1906 None
1907 }
1908 };
1909 if let Some(player) = next_player {
1910 self.set_decision(Decision {
1911 player,
1912 kind: DecisionKind::Encore,
1913 focus_slot: None,
1914 });
1915 break;
1916 }
1917 }
1918
1919 match self.state.turn.phase {
1920 Phase::Mulligan => {
1921 if self.state.turn.mulligan_done[0] && self.state.turn.mulligan_done[1] {
1922 self.state.turn.phase = Phase::Stand;
1923 self.state.turn.phase_step = 0;
1924 self.state.turn.active_player = self.state.turn.starting_player;
1925 continue;
1926 }
1927 let sp = self.state.turn.starting_player as usize;
1928 let next = if !self.state.turn.mulligan_done[sp] {
1929 sp
1930 } else {
1931 1 - sp
1932 };
1933 self.set_decision(Decision {
1934 player: next as u8,
1935 kind: DecisionKind::Mulligan,
1936 focus_slot: None,
1937 });
1938 break;
1939 }
1940 Phase::Stand => {
1941 let p = self.state.turn.active_player;
1942 match self.state.turn.phase_step {
1943 0 => {
1944 self.run_check_timing(crate::db::AbilityTiming::BeginTurn);
1945 if self.state.turn.pending_level_up.is_some()
1946 || !self.state.turn.pending_triggers.is_empty()
1947 {
1948 continue;
1949 }
1950 self.run_check_timing(crate::db::AbilityTiming::BeginStandPhase);
1951 self.state.turn.phase_step = 1;
1952 continue;
1953 }
1954 1 => {
1955 self.resolve_stand_phase(p);
1956 self.state.turn.phase_step = 2;
1957 continue;
1958 }
1959 2 => {
1960 self.run_check_timing(crate::db::AbilityTiming::AfterStandPhase);
1961 self.state.turn.phase_step = 3;
1962 continue;
1963 }
1964 _ => {
1965 if self.state.turn.pending_level_up.is_some()
1966 || !self.state.turn.pending_triggers.is_empty()
1967 {
1968 continue;
1969 }
1970 self.state.turn.phase = Phase::Draw;
1971 self.state.turn.phase_step = 0;
1972 continue;
1973 }
1974 }
1975 }
1976 Phase::Draw => {
1977 let p = self.state.turn.active_player;
1978 match self.state.turn.phase_step {
1979 0 => {
1980 self.run_check_timing(crate::db::AbilityTiming::BeginDrawPhase);
1981 self.state.turn.phase_step = 1;
1982 continue;
1983 }
1984 1 => {
1985 self.draw_to_hand(p, 1);
1986 self.state.turn.phase_step = 2;
1987 continue;
1988 }
1989 2 => {
1990 self.run_check_timing(crate::db::AbilityTiming::AfterDrawPhase);
1991 self.state.turn.phase_step = 3;
1992 continue;
1993 }
1994 _ => {
1995 if self.state.turn.pending_level_up.is_some()
1996 || !self.state.turn.pending_triggers.is_empty()
1997 {
1998 continue;
1999 }
2000 self.state.turn.phase = if self.curriculum.enable_clock_phase {
2001 Phase::Clock
2002 } else {
2003 Phase::Main
2004 };
2005 self.state.turn.phase_step = 0;
2006 continue;
2007 }
2008 }
2009 }
2010 Phase::Clock => {
2011 if !self.curriculum.enable_clock_phase {
2012 self.state.turn.phase = Phase::Main;
2013 self.state.turn.phase_step = 0;
2014 continue;
2015 }
2016 let p = self.state.turn.active_player;
2017 match self.state.turn.phase_step {
2018 0 => {
2019 self.run_check_timing(crate::db::AbilityTiming::BeginClockPhase);
2020 self.state.turn.phase_step = 1;
2021 continue;
2022 }
2023 1 => {
2024 self.set_decision(Decision {
2025 player: p,
2026 kind: DecisionKind::Clock,
2027 focus_slot: None,
2028 });
2029 break;
2030 }
2031 2 => {
2032 self.run_check_timing(crate::db::AbilityTiming::AfterClockPhase);
2033 self.state.turn.phase_step = 3;
2034 continue;
2035 }
2036 _ => {
2037 if self.state.turn.pending_level_up.is_some()
2038 || !self.state.turn.pending_triggers.is_empty()
2039 {
2040 continue;
2041 }
2042 self.state.turn.phase = Phase::Main;
2043 self.state.turn.phase_step = 0;
2044 continue;
2045 }
2046 }
2047 }
2048 Phase::Main => {
2049 let p = self.state.turn.active_player;
2050 if self.state.turn.phase_step == 0 {
2051 self.run_check_timing(crate::db::AbilityTiming::BeginMainPhase);
2052 self.state.turn.phase_step = 1;
2053 continue;
2054 }
2055 self.set_decision(Decision {
2056 player: p,
2057 kind: DecisionKind::Main,
2058 focus_slot: None,
2059 });
2060 break;
2061 }
2062 Phase::Climax => {
2063 if !self.curriculum.enable_climax_phase {
2064 self.state.turn.phase = Phase::Attack;
2065 self.state.turn.phase_step = 0;
2066 self.state.turn.attack_phase_begin_done = false;
2067 self.state.turn.attack_decl_check_done = false;
2068 continue;
2069 }
2070 let p = self.state.turn.active_player;
2071 match self.state.turn.phase_step {
2072 0 => {
2073 self.run_check_timing(crate::db::AbilityTiming::BeginClimaxPhase);
2074 self.state.turn.phase_step = 1;
2075 continue;
2076 }
2077 1 => {
2078 self.set_decision(Decision {
2079 player: p,
2080 kind: DecisionKind::Climax,
2081 focus_slot: None,
2082 });
2083 break;
2084 }
2085 2 => {
2086 self.run_check_timing(crate::db::AbilityTiming::AfterClimaxPhase);
2087 self.state.turn.phase_step = 3;
2088 continue;
2089 }
2090 _ => {
2091 if self.state.turn.pending_level_up.is_some()
2092 || !self.state.turn.pending_triggers.is_empty()
2093 {
2094 continue;
2095 }
2096 self.state.turn.phase = Phase::Attack;
2097 self.state.turn.phase_step = 0;
2098 self.state.turn.attack_phase_begin_done = false;
2099 self.state.turn.attack_decl_check_done = false;
2100 continue;
2101 }
2102 }
2103 }
2104 Phase::Attack => {
2105 if !self.state.turn.attack_phase_begin_done {
2106 self.run_check_timing(crate::db::AbilityTiming::BeginAttackPhase);
2107 self.state.turn.attack_phase_begin_done = true;
2108 continue;
2109 }
2110 if self.state.turn.attack.is_none() {
2111 if !self.state.turn.attack_decl_check_done {
2112 self.run_check_timing(
2113 crate::db::AbilityTiming::BeginAttackDeclarationStep,
2114 );
2115 self.state.turn.attack_decl_check_done = true;
2116 continue;
2117 }
2118 let p = self.state.turn.active_player;
2119 self.recompute_derived_attack();
2120 self.set_decision(Decision {
2121 player: p,
2122 kind: DecisionKind::AttackDeclaration,
2123 focus_slot: None,
2124 });
2125 break;
2126 }
2127 self.resolve_attack_pipeline();
2128 }
2129 Phase::End => {
2130 let p = self.state.turn.active_player;
2131 if self.resolve_end_phase(p) {
2132 self.state.turn.active_player = 1 - p;
2133 self.state.turn.phase = Phase::Stand;
2134 self.state.turn.phase_step = 0;
2135 }
2136 }
2137 }
2138 self.maybe_validate_state("advance_loop");
2139 }
2140 }
2141
2142 fn card_set_allowed(&self, card: &CardStatic) -> bool {
2143 match (&self.curriculum.allowed_card_sets_cache, &card.card_set) {
2144 (None, _) => true,
2145 (Some(set), Some(set_id)) => set.contains(set_id),
2146 (Some(_), None) => false,
2147 }
2148 }
2149
2150 fn handle_illegal_action(
2151 &mut self,
2152 acting_player: u8,
2153 reason: &str,
2154 copy_obs: bool,
2155 ) -> Result<StepOutcome> {
2156 self.last_illegal_action = true;
2157 self.last_perspective = acting_player;
2158 match self.config.error_policy {
2159 ErrorPolicy::Strict => Err(anyhow!("Illegal action: {reason}")),
2160 ErrorPolicy::LenientTerminate => {
2161 let winner = 1 - acting_player;
2162 self.state.terminal = Some(TerminalResult::Win { winner });
2163 self.decision = None;
2164 self.update_action_cache();
2165 Ok(self.build_outcome_with_obs(self.terminal_reward_for(acting_player), copy_obs))
2166 }
2167 ErrorPolicy::LenientNoop => {
2168 self.update_action_cache();
2169 Ok(self.build_outcome_with_obs(0.0, copy_obs))
2170 }
2171 }
2172 }
2173
2174 pub(crate) fn terminal_reward_for(&self, perspective: u8) -> f32 {
2175 let RewardConfig {
2176 terminal_win,
2177 terminal_loss,
2178 terminal_draw,
2179 ..
2180 } = &self.config.reward;
2181 match self.state.terminal {
2182 Some(TerminalResult::Win { winner }) => {
2183 if winner == perspective {
2184 *terminal_win
2185 } else {
2186 *terminal_loss
2187 }
2188 }
2189 Some(TerminalResult::Draw | TerminalResult::Timeout) => *terminal_draw,
2190 None => 0.0,
2191 }
2192 }
2193}
2194
2195#[cfg(test)]
2196mod tests;