weiss_core/
env.rs

1use anyhow::{anyhow, Result};
2use std::collections::BTreeSet;
3use std::sync::Arc;
4
5use crate::config::{
6    CurriculumConfig, EnvConfig, ErrorPolicy, ObservationVisibility, RewardConfig,
7};
8use crate::db::{CardDb, CardId, CardStatic, CardType};
9use crate::encode::{
10    encode_observation_with_slot_power, fill_action_mask, ACTION_ENCODING_VERSION,
11    OBS_ENCODING_VERSION, OBS_LEN,
12};
13use crate::events::{Event, Zone};
14use crate::legal::{ActionDesc, Decision, DecisionKind};
15use crate::replay::{ReplayConfig, ReplayEvent, ReplayWriter, StepMeta};
16use crate::state::{
17    CardInstance, CardInstanceId, ChoiceOptionRef, DamageType, GameState, ModifierDuration,
18    ModifierKind, Phase, TargetRef, TerminalResult, TimingWindow,
19};
20use crate::util::Rng64;
21
22/// Metadata describing the current environment state for Python info payloads.
23#[derive(Clone, Debug)]
24pub struct EnvInfo {
25    pub obs_version: u32,
26    pub action_version: u32,
27    pub decision_kind: i8,
28    pub current_player: i8,
29    pub actor: i8,
30    pub decision_count: u32,
31    pub tick_count: u32,
32    pub terminal: Option<TerminalResult>,
33    pub illegal_action: bool,
34    pub engine_error: bool,
35    pub engine_error_code: u8,
36}
37
38/// Outcome from applying a single decision action.
39#[derive(Clone, Debug)]
40pub struct StepOutcome {
41    pub obs: Vec<i32>,
42    pub reward: f32,
43    pub terminated: bool,
44    pub truncated: bool,
45    pub info: EnvInfo,
46}
47
48#[derive(Clone, Copy, Debug)]
49struct VisibilityContext {
50    viewer: Option<u8>,
51    mode: ObservationVisibility,
52    policies_enabled: bool,
53}
54
55impl VisibilityContext {
56    fn is_public(self) -> bool {
57        self.policies_enabled && self.mode == ObservationVisibility::Public
58    }
59}
60
61#[derive(Clone, Copy, Debug, PartialEq, Eq)]
62pub enum EngineErrorCode {
63    None = 0,
64    StackAutoResolveCap = 1,
65    TriggerQuiescenceCap = 2,
66    Panic = 3,
67    ActionError = 4,
68}
69
70#[derive(Clone, Copy, Debug, Default)]
71pub struct DebugConfig {
72    pub fingerprint_every_n: u32,
73    pub event_ring_capacity: usize,
74}
75
76/// A single Weiss Schwarz environment instance with deterministic RNG state.
77pub struct GameEnv {
78    pub db: Arc<CardDb>,
79    pub config: EnvConfig,
80    pub curriculum: CurriculumConfig,
81    pub state: GameState,
82    pub env_id: u32,
83    pub episode_index: u32,
84    pub decision: Option<Decision>,
85    action_cache: ActionCache,
86    decision_id: u32,
87    pub last_action_desc: Option<ActionDesc>,
88    pub last_action_player: Option<u8>,
89    pub last_illegal_action: bool,
90    pub last_engine_error: bool,
91    pub last_engine_error_code: EngineErrorCode,
92    pub last_perspective: u8,
93    pub pending_damage_delta: [i32; 2],
94    pub obs_buf: Vec<i32>,
95    slot_power_cache: [[i32; crate::encode::MAX_STAGE]; 2],
96    slot_power_dirty: [[bool; crate::encode::MAX_STAGE]; 2],
97    slot_power_cache_card: [[CardId; crate::encode::MAX_STAGE]; 2],
98    slot_power_cache_mod_turn: [[i32; crate::encode::MAX_STAGE]; 2],
99    slot_power_cache_mod_battle: [[i32; crate::encode::MAX_STAGE]; 2],
100    rule_actions_dirty: bool,
101    continuous_modifiers_dirty: bool,
102    last_rule_action_phase: Phase,
103    pub replay_config: ReplayConfig,
104    pub replay_writer: Option<ReplayWriter>,
105    pub replay_actions: Vec<ActionDesc>,
106    pub replay_events: Vec<ReplayEvent>,
107    canonical_events: Vec<Event>,
108    pub replay_steps: Vec<StepMeta>,
109    pub recording: bool,
110    pub meta_rng: Rng64,
111    pub episode_seed: u64,
112    pub scratch_replacement_indices: Vec<usize>,
113    scratch: EnvScratch,
114    revealed_to_viewer: [BTreeSet<CardInstanceId>; 2],
115    debug: DebugConfig,
116    debug_event_ring: Option<[EventRing; 2]>,
117}
118
119#[derive(Clone, Copy, Debug)]
120struct DamageIntentLocal {
121    source_player: u8,
122    source_slot: Option<u8>,
123    target: u8,
124    amount: i32,
125    damage_type: DamageType,
126    cancelable: bool,
127    refresh_penalty: bool,
128}
129
130struct EnvScratch {
131    targets: Vec<TargetRef>,
132    choice_options: Vec<ChoiceOptionRef>,
133    priority_actions: Vec<ActionDesc>,
134}
135
136impl EnvScratch {
137    fn new() -> Self {
138        Self {
139            targets: Vec::with_capacity(32),
140            choice_options: Vec::with_capacity(32),
141            priority_actions: Vec::with_capacity(16),
142        }
143    }
144}
145
146struct ActionCache {
147    mask: Vec<u8>,
148    lookup: Vec<Option<ActionDesc>>,
149    legal_actions: Vec<ActionDesc>,
150    decision_id: u32,
151    decision_kind: Option<DecisionKind>,
152    decision_player: u8,
153}
154
155impl ActionCache {
156    fn new() -> Self {
157        Self {
158            mask: vec![0u8; crate::encode::ACTION_SPACE_SIZE],
159            lookup: vec![None; crate::encode::ACTION_SPACE_SIZE],
160            legal_actions: Vec::new(),
161            decision_id: 0,
162            decision_kind: None,
163            decision_player: 0,
164        }
165    }
166
167    fn clear(&mut self) {
168        if self.mask.len() != crate::encode::ACTION_SPACE_SIZE {
169            self.mask.resize(crate::encode::ACTION_SPACE_SIZE, 0);
170        }
171        self.mask.fill(0);
172        if self.lookup.len() != crate::encode::ACTION_SPACE_SIZE {
173            self.lookup.resize(crate::encode::ACTION_SPACE_SIZE, None);
174        }
175        for slot in self.lookup.iter_mut() {
176            *slot = None;
177        }
178        self.legal_actions.clear();
179        self.decision_id = 0;
180        self.decision_kind = None;
181        self.decision_player = 0;
182    }
183
184    fn update(
185        &mut self,
186        state: &GameState,
187        decision: &Decision,
188        decision_id: u32,
189        db: &CardDb,
190        curriculum: &CurriculumConfig,
191        allowed_card_sets: Option<&std::collections::HashSet<String>>,
192    ) {
193        if self.decision_id == decision_id
194            && self.decision_kind == Some(decision.kind)
195            && self.decision_player == decision.player
196        {
197            return;
198        }
199        let actions =
200            crate::legal::legal_actions_cached(state, decision, db, curriculum, allowed_card_sets);
201        fill_action_mask(&actions, &mut self.mask, &mut self.lookup);
202        self.legal_actions = actions;
203        self.decision_id = decision_id;
204        self.decision_kind = Some(decision.kind);
205        self.decision_player = decision.player;
206    }
207}
208
209struct EventRing {
210    capacity: usize,
211    events: Vec<ReplayEvent>,
212    next: usize,
213    full: bool,
214}
215
216impl EventRing {
217    fn new(capacity: usize) -> Self {
218        let mut events = Vec::with_capacity(capacity);
219        events.reserve(capacity);
220        Self {
221            capacity,
222            events,
223            next: 0,
224            full: false,
225        }
226    }
227
228    fn clear(&mut self) {
229        self.events.clear();
230        self.next = 0;
231        self.full = false;
232    }
233
234    fn push(&mut self, event: ReplayEvent) {
235        if self.capacity == 0 {
236            return;
237        }
238        if self.events.len() < self.capacity {
239            self.events.push(event);
240            if self.events.len() == self.capacity {
241                self.full = true;
242                self.next = 0;
243            }
244        } else {
245            self.events[self.next] = event;
246            self.next = (self.next + 1) % self.capacity;
247        }
248    }
249
250    fn len(&self) -> usize {
251        if self.capacity == 0 {
252            0
253        } else if self.full {
254            self.capacity
255        } else {
256            self.events.len()
257        }
258    }
259
260    fn snapshot_codes<F: Fn(&ReplayEvent) -> u32>(&self, out: &mut [u32], code_fn: F) -> usize {
261        let len = self.len();
262        if len == 0 {
263            for slot in out.iter_mut() {
264                *slot = 0;
265            }
266            return 0;
267        }
268        let cap = self.capacity;
269        for (i, slot) in out.iter_mut().enumerate() {
270            if i >= len {
271                *slot = 0;
272                continue;
273            }
274            let idx = if self.full { (self.next + i) % cap } else { i };
275            *slot = code_fn(&self.events[idx]);
276        }
277        len
278    }
279
280    fn snapshot_events(&self) -> Vec<ReplayEvent> {
281        let len = self.len();
282        if len == 0 {
283            return Vec::new();
284        }
285        let cap = self.capacity;
286        let mut out = Vec::with_capacity(len);
287        for i in 0..len {
288            let idx = if self.full { (self.next + i) % cap } else { i };
289            out.push(self.events[idx].clone());
290        }
291        out
292    }
293}
294
295fn event_code(event: &ReplayEvent) -> u32 {
296    use crate::events::Event;
297    match event {
298        Event::Draw { .. } => 1,
299        Event::Damage { .. } => 2,
300        Event::DamageCancel { .. } => 3,
301        Event::DamageIntent { .. } => 4,
302        Event::DamageModifierApplied { .. } => 5,
303        Event::DamageModified { .. } => 6,
304        Event::DamageCommitted { .. } => 7,
305        Event::ReversalCommitted { .. } => 8,
306        Event::Reveal { .. } => 9,
307        Event::TriggerQueued { .. } => 10,
308        Event::TriggerGrouped { .. } => 11,
309        Event::TriggerResolved { .. } => 12,
310        Event::TriggerCanceled { .. } => 13,
311        Event::TimingWindowEntered { .. } => 14,
312        Event::PriorityGranted { .. } => 15,
313        Event::PriorityPassed { .. } => 16,
314        Event::StackGroupPresented { .. } => 17,
315        Event::StackOrderChosen { .. } => 18,
316        Event::StackPushed { .. } => 19,
317        Event::StackResolved { .. } => 20,
318        Event::AutoResolveCapExceeded { .. } => 21,
319        Event::WindowAdvanced { .. } => 22,
320        Event::ChoicePresented { .. } => 23,
321        Event::ChoicePageChanged { .. } => 24,
322        Event::ChoiceMade { .. } => 25,
323        Event::ChoiceAutopicked { .. } => 26,
324        Event::ChoiceSkipped { .. } => 27,
325        Event::ZoneMove { .. } => 28,
326        Event::ControlChanged { .. } => 29,
327        Event::ModifierAdded { .. } => 30,
328        Event::ModifierRemoved { .. } => 31,
329        Event::Concede { .. } => 32,
330        Event::Play { .. } => 33,
331        Event::PlayEvent { .. } => 34,
332        Event::PlayClimax { .. } => 35,
333        Event::Trigger { .. } => 36,
334        Event::Attack { .. } => 37,
335        Event::AttackType { .. } => 38,
336        Event::Counter { .. } => 39,
337        Event::Clock { .. } => 40,
338        Event::Shuffle { .. } => 41,
339        Event::Refresh { .. } => 42,
340        Event::RefreshPenalty { .. } => 43,
341        Event::LevelUpChoice { .. } => 44,
342        Event::Encore { .. } => 45,
343        Event::Stand { .. } => 46,
344        Event::EndTurn { .. } => 47,
345        Event::Terminal { .. } => 48,
346    }
347}
348
349const MAX_CHOICE_OPTIONS: usize = crate::encode::CHOICE_COUNT;
350pub const STACK_AUTO_RESOLVE_CAP: u32 = 256;
351pub const CHECK_TIMING_QUIESCENCE_CAP: u32 = 256;
352pub const HAND_LIMIT: usize = 7;
353
354const TRIGGER_EFFECT_SOUL: u8 = 0;
355const TRIGGER_EFFECT_DRAW: u8 = 1;
356const TRIGGER_EFFECT_SHOT: u8 = 2;
357const TRIGGER_EFFECT_GATE: u8 = 3;
358const TRIGGER_EFFECT_BOUNCE: u8 = 4;
359const TRIGGER_EFFECT_STANDBY: u8 = 5;
360const TRIGGER_EFFECT_TREASURE_STOCK: u8 = 6;
361const TRIGGER_EFFECT_TREASURE_MOVE: u8 = 7;
362
363#[derive(Clone, Copy, Debug)]
364struct TriggerCompileContext {
365    source_card: CardId,
366    standby_slot: Option<u8>,
367    treasure_take_stock: Option<bool>,
368}
369
370mod interaction;
371mod modifiers;
372mod movement;
373mod phases;
374mod visibility;
375
376impl GameEnv {
377    fn validate_deck_lists(db: &CardDb, deck_lists: &[Vec<CardId>; 2]) {
378        for (player, deck) in deck_lists.iter().enumerate() {
379            assert!(
380                deck.len() == crate::encode::MAX_DECK,
381                "Deck {player} has {} cards (must be {})",
382                deck.len(),
383                crate::encode::MAX_DECK
384            );
385            let mut climax_count = 0usize;
386            let mut counts: std::collections::HashMap<CardId, usize> =
387                std::collections::HashMap::new();
388            for &card_id in deck {
389                let card = db
390                    .get(card_id)
391                    .unwrap_or_else(|| panic!("Deck {player} contains unknown card id {card_id}"));
392                if card.card_type == CardType::Climax {
393                    climax_count += 1;
394                }
395                *counts.entry(card_id).or_insert(0) += 1;
396            }
397            assert!(
398                climax_count <= 8,
399                "Deck {player} has {climax_count} climax cards (max 8)"
400            );
401            for (card_id, count) in counts {
402                assert!(
403                    count <= 4,
404                    "Deck {player} has {count} copies of card {card_id} (max 4)"
405                );
406            }
407        }
408    }
409
410    pub fn add_modifier(
411        &mut self,
412        source: CardId,
413        target_player: u8,
414        target_slot: u8,
415        kind: ModifierKind,
416        magnitude: i32,
417        duration: ModifierDuration,
418    ) -> Option<u32> {
419        self.add_modifier_instance(
420            source,
421            None,
422            target_player,
423            target_slot,
424            kind,
425            magnitude,
426            duration,
427            crate::state::ModifierLayer::Effect,
428        )
429    }
430
431    pub(crate) fn mark_rule_actions_dirty(&mut self) {
432        self.rule_actions_dirty = true;
433    }
434
435    pub(crate) fn mark_continuous_modifiers_dirty(&mut self) {
436        self.continuous_modifiers_dirty = true;
437    }
438
439    pub fn new(
440        db: Arc<CardDb>,
441        config: EnvConfig,
442        curriculum: CurriculumConfig,
443        seed: u64,
444        replay_config: ReplayConfig,
445        replay_writer: Option<ReplayWriter>,
446        env_id: u32,
447    ) -> Self {
448        Self::validate_deck_lists(&db, &config.deck_lists);
449        let starting_player = (seed as u8) & 1;
450        let state = GameState::new(
451            config.deck_lists[0].clone(),
452            config.deck_lists[1].clone(),
453            seed,
454            starting_player,
455        );
456        let mut curriculum = curriculum;
457        curriculum.rebuild_cache();
458        let mut replay_config = replay_config;
459        replay_config.rebuild_cache();
460        let mut env = Self {
461            db,
462            config,
463            curriculum,
464            state,
465            env_id,
466            episode_index: 0,
467            decision: None,
468            action_cache: ActionCache::new(),
469            decision_id: 0,
470            last_action_desc: None,
471            last_action_player: None,
472            last_illegal_action: false,
473            last_engine_error: false,
474            last_engine_error_code: EngineErrorCode::None,
475            last_perspective: 0,
476            pending_damage_delta: [0, 0],
477            obs_buf: vec![0; OBS_LEN],
478            slot_power_cache: [[0; crate::encode::MAX_STAGE]; 2],
479            slot_power_dirty: [[true; crate::encode::MAX_STAGE]; 2],
480            slot_power_cache_card: [[0; crate::encode::MAX_STAGE]; 2],
481            slot_power_cache_mod_turn: [[0; crate::encode::MAX_STAGE]; 2],
482            slot_power_cache_mod_battle: [[0; crate::encode::MAX_STAGE]; 2],
483            rule_actions_dirty: true,
484            continuous_modifiers_dirty: true,
485            last_rule_action_phase: Phase::Stand,
486            replay_config,
487            replay_writer,
488            replay_actions: Vec::new(),
489            replay_events: Vec::new(),
490            canonical_events: Vec::new(),
491            replay_steps: Vec::new(),
492            recording: false,
493            meta_rng: Rng64::new(seed ^ 0xABCDEF1234567890),
494            episode_seed: seed,
495            scratch_replacement_indices: Vec::new(),
496            scratch: EnvScratch::new(),
497            revealed_to_viewer: std::array::from_fn(|_| BTreeSet::new()),
498            debug: DebugConfig::default(),
499            debug_event_ring: None,
500        };
501        env.reset();
502        env
503    }
504
505    pub fn reset(&mut self) -> StepOutcome {
506        self.reset_with_obs(true)
507    }
508
509    pub fn reset_no_copy(&mut self) -> StepOutcome {
510        self.reset_with_obs(false)
511    }
512
513    pub fn canonical_events(&self) -> &[Event] {
514        &self.canonical_events
515    }
516
517    pub fn decision_id(&self) -> u32 {
518        self.decision_id
519    }
520
521    pub fn action_mask(&self) -> &[u8] {
522        &self.action_cache.mask
523    }
524
525    pub fn action_lookup(&self) -> &[Option<ActionDesc>] {
526        &self.action_cache.lookup
527    }
528
529    pub fn legal_actions(&self) -> &[ActionDesc] {
530        &self.action_cache.legal_actions
531    }
532
533    pub fn debug_event_ring_codes(&self, viewer: u8, out: &mut [u32]) -> u16 {
534        let Some(rings) = self.debug_event_ring.as_ref() else {
535            for slot in out.iter_mut() {
536                *slot = 0;
537            }
538            return 0;
539        };
540        let ring = &rings[viewer as usize % 2];
541        let count = ring.snapshot_codes(out, event_code);
542        count as u16
543    }
544
545    pub fn debug_event_ring_snapshot(&self, viewer: u8) -> Vec<ReplayEvent> {
546        let Some(rings) = self.debug_event_ring.as_ref() else {
547            return Vec::new();
548        };
549        rings[viewer as usize % 2].snapshot_events()
550    }
551
552    fn reset_with_obs(&mut self, copy_obs: bool) -> StepOutcome {
553        let episode_seed = self.meta_rng.next_u64();
554        let starting_player = if (episode_seed & 1) == 1 { 1 } else { 0 };
555        self.episode_seed = episode_seed;
556        self.episode_index = self.episode_index.wrapping_add(1);
557        Self::validate_deck_lists(&self.db, &self.config.deck_lists);
558        self.state = GameState::new(
559            self.config.deck_lists[0].clone(),
560            self.config.deck_lists[1].clone(),
561            episode_seed,
562            starting_player,
563        );
564        self.slot_power_cache = [[0; crate::encode::MAX_STAGE]; 2];
565        self.slot_power_dirty = [[true; crate::encode::MAX_STAGE]; 2];
566        self.slot_power_cache_card = [[0; crate::encode::MAX_STAGE]; 2];
567        self.slot_power_cache_mod_turn = [[0; crate::encode::MAX_STAGE]; 2];
568        self.slot_power_cache_mod_battle = [[0; crate::encode::MAX_STAGE]; 2];
569        self.rule_actions_dirty = true;
570        self.continuous_modifiers_dirty = true;
571        self.last_rule_action_phase = self.state.turn.phase;
572        self.decision = None;
573        self.action_cache.clear();
574        self.decision_id = 0;
575        self.last_action_desc = None;
576        self.last_action_player = None;
577        self.last_illegal_action = false;
578        self.last_engine_error = false;
579        self.last_engine_error_code = EngineErrorCode::None;
580        self.last_perspective = self.state.turn.starting_player;
581        self.pending_damage_delta = [0, 0];
582        if self.obs_buf.len() != OBS_LEN {
583            self.obs_buf.resize(OBS_LEN, 0);
584        }
585        self.replay_actions.clear();
586        self.replay_events.clear();
587        self.canonical_events.clear();
588        self.replay_steps.clear();
589        for set in &mut self.revealed_to_viewer {
590            set.clear();
591        }
592        if let Some(rings) = self.debug_event_ring.as_mut() {
593            for ring in rings.iter_mut() {
594                ring.clear();
595            }
596        }
597        self.recording = self.replay_config.enabled
598            && self.meta_rng.next_u32() <= self.replay_config.sample_threshold;
599        self.scratch_replacement_indices.clear();
600
601        for player in 0..2 {
602            self.shuffle_deck(player as u8);
603            self.draw_to_hand(player as u8, 5);
604        }
605
606        self.advance_until_decision();
607        self.update_action_cache();
608        self.maybe_validate_state("reset");
609        self.build_outcome_with_obs(0.0, copy_obs)
610    }
611
612    pub(crate) fn clear_status_flags(&mut self) {
613        self.last_illegal_action = false;
614        self.last_engine_error = false;
615        self.last_engine_error_code = EngineErrorCode::None;
616    }
617
618    fn run_rule_actions_if_needed(&mut self) {
619        if self.state.turn.phase != self.last_rule_action_phase {
620            self.rule_actions_dirty = true;
621            self.last_rule_action_phase = self.state.turn.phase;
622        }
623        if !self.rule_actions_dirty {
624            return;
625        }
626        self.rule_actions_dirty = false;
627        self.resolve_rule_actions_until_stable();
628        self.rule_actions_dirty = false;
629    }
630
631    pub(super) fn set_decision(&mut self, decision: Decision) {
632        self.decision = Some(decision);
633        self.decision_id = self.decision_id.wrapping_add(1);
634    }
635
636    pub(super) fn clear_decision(&mut self) {
637        self.decision = None;
638    }
639
640    pub fn set_debug_config(&mut self, debug: DebugConfig) {
641        self.debug = debug;
642        if debug.event_ring_capacity == 0 {
643            self.debug_event_ring = None;
644        } else {
645            self.debug_event_ring = Some(std::array::from_fn(|_| {
646                EventRing::new(debug.event_ring_capacity)
647            }));
648        }
649    }
650
651    pub fn apply_action_id(&mut self, action_id: usize) -> Result<StepOutcome> {
652        self.apply_action_id_internal(action_id, true)
653    }
654
655    pub fn apply_action_id_no_copy(&mut self, action_id: usize) -> Result<StepOutcome> {
656        self.apply_action_id_internal(action_id, false)
657    }
658
659    fn apply_action_id_internal(
660        &mut self,
661        action_id: usize,
662        copy_obs: bool,
663    ) -> Result<StepOutcome> {
664        self.last_illegal_action = false;
665        self.last_engine_error = false;
666        self.last_engine_error_code = EngineErrorCode::None;
667        if self.decision.is_none() {
668            return Err(anyhow!("No pending decision"));
669        }
670        self.last_perspective = self.decision.as_ref().unwrap().player;
671        let action = match self
672            .action_cache
673            .lookup
674            .get(action_id)
675            .and_then(|a| a.clone())
676        {
677            Some(action) => action,
678            None => {
679                let player = self.decision.as_ref().unwrap().player;
680                return self.handle_illegal_action(player, "Invalid action id", copy_obs);
681            }
682        };
683        self.apply_action_internal(action, copy_obs)
684    }
685
686    pub fn apply_action(&mut self, action: ActionDesc) -> Result<StepOutcome> {
687        self.apply_action_internal(action, true)
688    }
689
690    fn apply_action_internal(&mut self, action: ActionDesc, copy_obs: bool) -> Result<StepOutcome> {
691        let acting_player = self
692            .decision
693            .as_ref()
694            .map(|d| d.player)
695            .unwrap_or(self.last_perspective);
696        self.last_perspective = acting_player;
697        self.pending_damage_delta = [0, 0];
698        let decision_kind = self
699            .decision
700            .as_ref()
701            .map(|d| d.kind)
702            .unwrap_or(DecisionKind::Main);
703        let action_clone = action.clone();
704        if self.should_validate_state() {
705            if let Some(decision) = &self.decision {
706                let legal = crate::legal::legal_actions_cached(
707                    &self.state,
708                    decision,
709                    &self.db,
710                    &self.curriculum,
711                    self.curriculum.allowed_card_sets_cache.as_ref(),
712                );
713                if !legal.contains(&action_clone) {
714                    return self.handle_illegal_action(
715                        decision.player,
716                        "Action not in legal set",
717                        copy_obs,
718                    );
719                }
720            }
721        }
722        let outcome = match self.apply_action_impl(action, copy_obs) {
723            Ok(outcome) => Ok(outcome),
724            Err(err) => match self.config.error_policy {
725                ErrorPolicy::Strict => Err(err),
726                ErrorPolicy::LenientTerminate => {
727                    self.last_engine_error = true;
728                    self.last_engine_error_code = EngineErrorCode::ActionError;
729                    self.last_perspective = acting_player;
730                    self.state.terminal = Some(TerminalResult::Win {
731                        winner: 1 - acting_player,
732                    });
733                    self.decision = None;
734                    self.update_action_cache();
735                    Ok(self
736                        .build_outcome_with_obs(self.terminal_reward_for(acting_player), copy_obs))
737                }
738                ErrorPolicy::LenientNoop => {
739                    self.last_engine_error = true;
740                    self.last_engine_error_code = EngineErrorCode::ActionError;
741                    self.last_perspective = acting_player;
742                    self.update_action_cache();
743                    Ok(self.build_outcome_with_obs(0.0, copy_obs))
744                }
745            },
746        }?;
747        if self.recording || self.should_validate_state() {
748            self.log_action(acting_player, action_clone);
749            self.replay_steps.push(StepMeta {
750                actor: acting_player,
751                decision_kind,
752                illegal_action: self.last_illegal_action,
753                engine_error: self.last_engine_error,
754            });
755        }
756        Ok(outcome)
757    }
758
759    fn apply_action_impl(&mut self, action: ActionDesc, copy_obs: bool) -> Result<StepOutcome> {
760        let decision = self
761            .decision
762            .clone()
763            .ok_or_else(|| anyhow!("No decision to apply"))?;
764        self.last_perspective = decision.player;
765        self.last_action_desc = Some(action.clone());
766        self.last_action_player = Some(decision.player);
767
768        let mut reward = 0.0f32;
769
770        if action == ActionDesc::Concede {
771            self.log_event(Event::Concede {
772                player: decision.player,
773            });
774            self.state.terminal = Some(TerminalResult::Win {
775                winner: 1 - decision.player,
776            });
777            self.decision = None;
778            self.state.turn.decision_count += 1;
779            self.update_action_cache();
780            self.maybe_validate_state("post_concede");
781            reward += self.compute_reward(decision.player, &self.pending_damage_delta);
782            return Ok(self.build_outcome_with_obs(reward, copy_obs));
783        }
784
785        match decision.kind {
786            DecisionKind::Mulligan => match action {
787                ActionDesc::MulliganSelect { hand_index } => {
788                    let p = decision.player as usize;
789                    let hi = hand_index as usize;
790                    if hi >= self.state.players[p].hand.len() {
791                        return self.handle_illegal_action(
792                            decision.player,
793                            "Mulligan hand index out of range",
794                            copy_obs,
795                        );
796                    }
797                    if hi >= crate::encode::MAX_HAND {
798                        return self.handle_illegal_action(
799                            decision.player,
800                            "Mulligan hand index exceeds encoding",
801                            copy_obs,
802                        );
803                    }
804                    let bit = 1u64 << hi;
805                    let current = &mut self.state.turn.mulligan_selected[p];
806                    if *current & bit != 0 {
807                        *current &= !bit;
808                    } else {
809                        *current |= bit;
810                    }
811                }
812                ActionDesc::MulliganConfirm => {
813                    let p = decision.player as usize;
814                    let hand_len = self.state.players[p].hand.len();
815                    let mut indices: Vec<usize> = Vec::new();
816                    let mask = self.state.turn.mulligan_selected[p];
817                    for idx in 0..hand_len.min(crate::encode::MAX_HAND) {
818                        if mask & (1u64 << idx) != 0 {
819                            indices.push(idx);
820                        }
821                    }
822                    indices.sort_by(|a, b| b.cmp(a));
823                    for idx in indices.iter().copied() {
824                        if idx >= self.state.players[p].hand.len() {
825                            continue;
826                        }
827                        let card = self.state.players[p].hand.remove(idx);
828                        let from_slot = if idx <= u8::MAX as usize {
829                            Some(idx as u8)
830                        } else {
831                            None
832                        };
833                        self.move_card_between_zones(
834                            p as u8,
835                            card,
836                            Zone::Hand,
837                            Zone::WaitingRoom,
838                            from_slot,
839                            None,
840                        );
841                    }
842                    let draw_count = indices.len();
843                    if draw_count > 0 {
844                        self.draw_to_hand(p as u8, draw_count);
845                    }
846                    self.state.turn.mulligan_done[p] = true;
847                    self.state.turn.mulligan_selected[p] = 0;
848                }
849                _ => {
850                    return self.handle_illegal_action(
851                        decision.player,
852                        "Invalid mulligan action",
853                        copy_obs,
854                    )
855                }
856            },
857            DecisionKind::Clock => {
858                match action {
859                    ActionDesc::Pass => {
860                        self.log_event(Event::Clock {
861                            player: decision.player,
862                            card: None,
863                        });
864                    }
865                    ActionDesc::Clock { hand_index } => {
866                        let p = decision.player as usize;
867                        let hi = hand_index as usize;
868                        if hi >= self.state.players[p].hand.len() {
869                            return self.handle_illegal_action(
870                                decision.player,
871                                "Clock hand index out of range",
872                                copy_obs,
873                            );
874                        }
875                        let card = self.state.players[p].hand.remove(hi);
876                        let card_id = card.id;
877                        self.move_card_between_zones(
878                            decision.player,
879                            card,
880                            Zone::Hand,
881                            Zone::Clock,
882                            Some(hand_index),
883                            None,
884                        );
885                        self.log_event(Event::Clock {
886                            player: decision.player,
887                            card: Some(card_id),
888                        });
889                        self.draw_to_hand(decision.player, 2);
890                        self.check_level_up(decision.player);
891                    }
892                    _ => {
893                        return self.handle_illegal_action(
894                            decision.player,
895                            "Invalid clock action",
896                            copy_obs,
897                        )
898                    }
899                }
900                self.state.turn.phase_step = 2;
901            }
902            DecisionKind::Main => match action {
903                ActionDesc::Pass => {
904                    if self.curriculum.enable_priority_windows {
905                        self.state.turn.main_passed = true;
906                        if self.state.turn.priority.is_none() {
907                            self.enter_timing_window(TimingWindow::MainWindow, decision.player);
908                        }
909                    } else {
910                        self.state.turn.main_passed = false;
911                        self.state.turn.phase = Phase::Climax;
912                        self.state.turn.phase_step = 0;
913                    }
914                }
915                ActionDesc::MainPlayCharacter {
916                    hand_index,
917                    stage_slot,
918                } => {
919                    if let Err(err) = self.play_character(decision.player, hand_index, stage_slot) {
920                        return self.handle_illegal_action(
921                            decision.player,
922                            &err.to_string(),
923                            copy_obs,
924                        );
925                    }
926                }
927                ActionDesc::MainPlayEvent { hand_index } => {
928                    if let Err(err) = self.play_event(decision.player, hand_index) {
929                        return self.handle_illegal_action(
930                            decision.player,
931                            &err.to_string(),
932                            copy_obs,
933                        );
934                    }
935                }
936                ActionDesc::MainMove { from_slot, to_slot } => {
937                    let p = decision.player as usize;
938                    let fs = from_slot as usize;
939                    let ts = to_slot as usize;
940                    if fs >= self.state.players[p].stage.len()
941                        || ts >= self.state.players[p].stage.len()
942                        || fs == ts
943                    {
944                        return self.handle_illegal_action(
945                            decision.player,
946                            "Invalid move slots",
947                            copy_obs,
948                        );
949                    }
950                    if self.state.players[p].stage[fs].card.is_none() {
951                        return self.handle_illegal_action(
952                            decision.player,
953                            "Move requires a source slot with a card",
954                            copy_obs,
955                        );
956                    }
957                    self.state.players[p].stage.swap(fs, ts);
958                    self.remove_modifiers_for_slot(decision.player, from_slot);
959                    self.remove_modifiers_for_slot(decision.player, to_slot);
960                    self.mark_slot_power_dirty(decision.player, from_slot);
961                    self.mark_slot_power_dirty(decision.player, to_slot);
962                    self.mark_rule_actions_dirty();
963                    self.mark_continuous_modifiers_dirty();
964                }
965                ActionDesc::MainActivateAbility {
966                    slot,
967                    ability_index,
968                } => {
969                    let _ = (slot, ability_index);
970                    return self.handle_illegal_action(
971                        decision.player,
972                        "Activated abilities only via priority window",
973                        copy_obs,
974                    );
975                }
976                _ => {
977                    return self.handle_illegal_action(
978                        decision.player,
979                        "Invalid main action",
980                        copy_obs,
981                    )
982                }
983            },
984            DecisionKind::Climax => match action {
985                ActionDesc::Pass => {
986                    self.state.turn.phase_step = 2;
987                    if self.curriculum.enable_priority_windows {
988                        self.enter_timing_window(TimingWindow::ClimaxWindow, decision.player);
989                    }
990                }
991                ActionDesc::ClimaxPlay { hand_index } => {
992                    if let Err(err) = self.play_climax(decision.player, hand_index) {
993                        return self.handle_illegal_action(
994                            decision.player,
995                            &err.to_string(),
996                            copy_obs,
997                        );
998                    }
999                    self.state.turn.phase_step = 2;
1000                    if self.curriculum.enable_priority_windows {
1001                        self.enter_timing_window(TimingWindow::ClimaxWindow, decision.player);
1002                    }
1003                }
1004                _ => {
1005                    return self.handle_illegal_action(
1006                        decision.player,
1007                        "Invalid climax action",
1008                        copy_obs,
1009                    )
1010                }
1011            },
1012            DecisionKind::AttackDeclaration => match action {
1013                ActionDesc::Pass => {
1014                    if self.curriculum.enable_encore {
1015                        self.queue_encore_requests();
1016                    } else {
1017                        self.cleanup_reversed_to_waiting_room();
1018                    }
1019                    self.state.turn.phase = Phase::End;
1020                    self.state.turn.phase_step = 0;
1021                    self.state.turn.attack_phase_begin_done = false;
1022                    self.state.turn.attack_decl_check_done = false;
1023                }
1024                ActionDesc::Attack { slot, attack_type } => {
1025                    if let Err(err) = self.declare_attack(decision.player, slot, attack_type) {
1026                        return self.handle_illegal_action(
1027                            decision.player,
1028                            &err.to_string(),
1029                            copy_obs,
1030                        );
1031                    }
1032                }
1033                _ => {
1034                    return self.handle_illegal_action(
1035                        decision.player,
1036                        "Invalid attack action",
1037                        copy_obs,
1038                    )
1039                }
1040            },
1041            DecisionKind::LevelUp => match action {
1042                ActionDesc::LevelUp { index } => {
1043                    if self.state.turn.pending_level_up != Some(decision.player) {
1044                        return self.handle_illegal_action(
1045                            decision.player,
1046                            "No pending level up",
1047                            copy_obs,
1048                        );
1049                    }
1050                    if let Err(err) = self.resolve_level_up(decision.player, index) {
1051                        return self.handle_illegal_action(
1052                            decision.player,
1053                            &err.to_string(),
1054                            copy_obs,
1055                        );
1056                    }
1057                }
1058                _ => {
1059                    return self.handle_illegal_action(
1060                        decision.player,
1061                        "Invalid level up action",
1062                        copy_obs,
1063                    )
1064                }
1065            },
1066            DecisionKind::Encore => match action {
1067                ActionDesc::EncorePay { slot } => {
1068                    if let Err(err) = self.resolve_encore(decision.player, slot, true) {
1069                        return self.handle_illegal_action(
1070                            decision.player,
1071                            &err.to_string(),
1072                            copy_obs,
1073                        );
1074                    }
1075                }
1076                ActionDesc::EncoreDecline { slot } => {
1077                    if let Err(err) = self.resolve_encore(decision.player, slot, false) {
1078                        return self.handle_illegal_action(
1079                            decision.player,
1080                            &err.to_string(),
1081                            copy_obs,
1082                        );
1083                    }
1084                }
1085                _ => {
1086                    return self.handle_illegal_action(
1087                        decision.player,
1088                        "Invalid encore action",
1089                        copy_obs,
1090                    )
1091                }
1092            },
1093            DecisionKind::TriggerOrder => {
1094                let Some(order) = self.state.turn.trigger_order.clone() else {
1095                    return self.handle_illegal_action(
1096                        decision.player,
1097                        "No trigger order pending",
1098                        copy_obs,
1099                    );
1100                };
1101                if order.player != decision.player {
1102                    return self.handle_illegal_action(
1103                        decision.player,
1104                        "Trigger order player mismatch",
1105                        copy_obs,
1106                    );
1107                }
1108                match action {
1109                    ActionDesc::TriggerOrder { index } => {
1110                        let idx = index as usize;
1111                        if idx >= order.choices.len() {
1112                            return self.handle_illegal_action(
1113                                decision.player,
1114                                "Trigger order index out of range",
1115                                copy_obs,
1116                            );
1117                        }
1118                        let trigger_id = order.choices[idx];
1119                        let trigger_index = self
1120                            .state
1121                            .turn
1122                            .pending_triggers
1123                            .iter()
1124                            .position(|t| t.id == trigger_id);
1125                        let Some(trigger_index) = trigger_index else {
1126                            return self.handle_illegal_action(
1127                                decision.player,
1128                                "Trigger already resolved",
1129                                copy_obs,
1130                            );
1131                        };
1132                        let trigger = self.state.turn.pending_triggers.remove(trigger_index);
1133                        let _ = self.resolve_trigger(trigger);
1134                        self.state.turn.trigger_order = None;
1135                    }
1136                    _ => {
1137                        return self.handle_illegal_action(
1138                            decision.player,
1139                            "Invalid trigger order action",
1140                            copy_obs,
1141                        )
1142                    }
1143                }
1144            }
1145            DecisionKind::Choice => {
1146                let Some(choice_ref) = self.state.turn.choice.as_ref() else {
1147                    return self.handle_illegal_action(
1148                        decision.player,
1149                        "No choice pending",
1150                        copy_obs,
1151                    );
1152                };
1153                if choice_ref.player != decision.player {
1154                    return self.handle_illegal_action(
1155                        decision.player,
1156                        "Choice player mismatch",
1157                        copy_obs,
1158                    );
1159                }
1160                match action {
1161                    ActionDesc::ChoiceSelect { index } => {
1162                        let Some(choice) = self.state.turn.choice.take() else {
1163                            return self.handle_illegal_action(
1164                                decision.player,
1165                                "No choice pending",
1166                                copy_obs,
1167                            );
1168                        };
1169                        let idx = index as usize;
1170                        if idx >= MAX_CHOICE_OPTIONS {
1171                            return self.handle_illegal_action(
1172                                decision.player,
1173                                "Choice index out of range",
1174                                copy_obs,
1175                            );
1176                        }
1177                        let total = choice.total_candidates as usize;
1178                        let page_start = choice.page_start as usize;
1179                        let global_idx = page_start + idx;
1180                        if global_idx >= total {
1181                            return self.handle_illegal_action(
1182                                decision.player,
1183                                "Choice index out of range",
1184                                copy_obs,
1185                            );
1186                        }
1187                        let Some(option) = choice.options.get(global_idx).copied() else {
1188                            return self.handle_illegal_action(
1189                                decision.player,
1190                                "Choice option missing",
1191                                copy_obs,
1192                            );
1193                        };
1194                        if self.recording {
1195                            self.log_event(Event::ChoiceMade {
1196                                choice_id: choice.id,
1197                                player: decision.player,
1198                                reason: choice.reason,
1199                                option,
1200                            });
1201                        }
1202                        self.recycle_choice_options(choice.options);
1203                        self.apply_choice_effect(
1204                            choice.reason,
1205                            choice.player,
1206                            option,
1207                            choice.pending_trigger,
1208                        );
1209                    }
1210                    ActionDesc::ChoicePrevPage | ActionDesc::ChoiceNextPage => {
1211                        let nav = {
1212                            let Some(choice) = self.state.turn.choice.as_mut() else {
1213                                return self.handle_illegal_action(
1214                                    decision.player,
1215                                    "No choice pending",
1216                                    copy_obs,
1217                                );
1218                            };
1219                            let total = choice.total_candidates as usize;
1220                            let page_size = MAX_CHOICE_OPTIONS;
1221                            let current = choice.page_start as usize;
1222                            let new_start = match action {
1223                                ActionDesc::ChoicePrevPage => {
1224                                    if current < page_size {
1225                                        None
1226                                    } else {
1227                                        Some(current - page_size)
1228                                    }
1229                                }
1230                                ActionDesc::ChoiceNextPage => {
1231                                    if current + page_size >= total {
1232                                        None
1233                                    } else {
1234                                        Some(current + page_size)
1235                                    }
1236                                }
1237                                _ => None,
1238                            };
1239                            if let Some(new_start) = new_start {
1240                                let from_start = choice.page_start;
1241                                choice.page_start = new_start as u16;
1242                                Some((choice.id, choice.player, from_start, choice.page_start))
1243                            } else {
1244                                None
1245                            }
1246                        };
1247                        let Some((choice_id, player, from_start, to_start)) = nav else {
1248                            return self.handle_illegal_action(
1249                                decision.player,
1250                                "Choice page out of range",
1251                                copy_obs,
1252                            );
1253                        };
1254                        if self.recording {
1255                            self.log_event(Event::ChoicePageChanged {
1256                                choice_id,
1257                                player,
1258                                from_start,
1259                                to_start,
1260                            });
1261                        }
1262                    }
1263                    _ => {
1264                        return self.handle_illegal_action(
1265                            decision.player,
1266                            "Invalid choice action",
1267                            copy_obs,
1268                        )
1269                    }
1270                }
1271            }
1272        }
1273
1274        self.decision = None;
1275        self.state.turn.decision_count += 1;
1276        if self.state.turn.decision_count >= self.config.max_decisions {
1277            self.state.terminal = Some(TerminalResult::Timeout);
1278        }
1279
1280        self.advance_until_decision();
1281        self.update_action_cache();
1282        self.maybe_validate_state("post_action");
1283
1284        reward += self.compute_reward(decision.player, &self.pending_damage_delta);
1285        Ok(self.build_outcome_with_obs(reward, copy_obs))
1286    }
1287
1288    fn compute_reward(&self, perspective: u8, damage_delta: &[i32; 2]) -> f32 {
1289        let RewardConfig {
1290            terminal_win,
1291            terminal_loss,
1292            terminal_draw,
1293            enable_shaping,
1294            damage_reward,
1295        } = &self.config.reward;
1296        if let Some(term) = self.state.terminal {
1297            return match term {
1298                TerminalResult::Win { winner } => {
1299                    if winner == perspective {
1300                        *terminal_win
1301                    } else {
1302                        *terminal_loss
1303                    }
1304                }
1305                TerminalResult::Draw | TerminalResult::Timeout => *terminal_draw,
1306            };
1307        }
1308        if *enable_shaping {
1309            let mut reward = 0.0;
1310            let p = perspective as usize;
1311            let opp = 1 - p;
1312            reward += *damage_reward * damage_delta[opp] as f32;
1313            reward -= *damage_reward * damage_delta[p] as f32;
1314            return reward;
1315        }
1316        0.0
1317    }
1318
1319    fn resolve_quiescence_until_decision(&mut self) {
1320        let mut auto_resolve_steps: u32 = 0;
1321        loop {
1322            if self.state.terminal.is_some() || self.decision.is_some() {
1323                return;
1324            }
1325            self.run_rule_actions_if_needed();
1326            self.refresh_continuous_modifiers_if_needed();
1327            if let Some(player) = self.state.turn.pending_level_up {
1328                self.set_decision(Decision {
1329                    player,
1330                    kind: DecisionKind::LevelUp,
1331                    focus_slot: None,
1332                });
1333                return;
1334            }
1335            if self.handle_trigger_pipeline() {
1336                if self.decision.is_some() {
1337                    return;
1338                }
1339                continue;
1340            }
1341            if self.handle_priority_window() {
1342                if self.decision.is_some() {
1343                    return;
1344                }
1345                continue;
1346            }
1347            if !self.curriculum.enable_priority_windows
1348                && self.state.turn.priority.is_none()
1349                && self.state.turn.choice.is_none()
1350                && self.state.turn.stack_order.is_none()
1351                && !self.state.turn.stack.is_empty()
1352            {
1353                auto_resolve_steps = auto_resolve_steps.saturating_add(1);
1354                if auto_resolve_steps > CHECK_TIMING_QUIESCENCE_CAP {
1355                    self.log_event(Event::AutoResolveCapExceeded {
1356                        cap: CHECK_TIMING_QUIESCENCE_CAP,
1357                        stack_len: self.state.turn.stack.len() as u32,
1358                        window: self.state.turn.active_window,
1359                    });
1360                    self.last_engine_error = true;
1361                    self.last_engine_error_code = EngineErrorCode::TriggerQuiescenceCap;
1362                    self.state.terminal = Some(TerminalResult::Timeout);
1363                    return;
1364                }
1365                if let Some(item) = self.state.turn.stack.pop() {
1366                    self.resolve_stack_item(&item);
1367                    self.log_event(Event::StackResolved { item });
1368                    continue;
1369                }
1370            }
1371            break;
1372        }
1373    }
1374
1375    pub(crate) fn update_action_cache(&mut self) {
1376        if self.decision.is_some() {
1377            let decision_kind = self
1378                .decision
1379                .as_ref()
1380                .map(|d| d.kind)
1381                .expect("decision kind");
1382            if decision_kind == DecisionKind::AttackDeclaration
1383                && self.state.turn.derived_attack.is_none()
1384            {
1385                self.recompute_derived_attack();
1386            }
1387            let decision = self.decision.as_ref().expect("decision present");
1388            self.last_perspective = decision.player;
1389            self.action_cache.update(
1390                &self.state,
1391                decision,
1392                self.decision_id,
1393                &self.db,
1394                &self.curriculum,
1395                self.curriculum.allowed_card_sets_cache.as_ref(),
1396            );
1397        } else {
1398            self.action_cache.clear();
1399        }
1400    }
1401
1402    fn should_validate_state(&self) -> bool {
1403        if cfg!(debug_assertions) {
1404            return true;
1405        }
1406        std::env::var("WEISS_VALIDATE_STATE").ok().as_deref() == Some("1")
1407    }
1408
1409    fn maybe_validate_state(&self, context: &str) {
1410        if !self.should_validate_state() {
1411            return;
1412        }
1413        if let Err(err) = self.validate_state() {
1414            panic!("validate_state failed at {context}: {err}");
1415        }
1416    }
1417
1418    pub fn validate_state(&self) -> Result<()> {
1419        use std::collections::{HashMap, HashSet};
1420        let mut errors = Vec::new();
1421
1422        let mut counts: [HashMap<CardId, i32>; 2] = [HashMap::new(), HashMap::new()];
1423        for (owner, owner_counts) in counts.iter_mut().enumerate() {
1424            let deck_list = &self.config.deck_lists[owner];
1425            for card in deck_list.iter().copied() {
1426                *owner_counts.entry(card).or_insert(0) += 1;
1427            }
1428        }
1429
1430        fn consume(
1431            counts: &mut [HashMap<CardId, i32>; 2],
1432            errors: &mut Vec<String>,
1433            owner: u8,
1434            card: CardId,
1435            zone: &str,
1436        ) {
1437            let owner_idx = owner as usize;
1438            let entry = counts[owner_idx].entry(card).or_insert(0);
1439            *entry -= 1;
1440            if *entry < 0 {
1441                errors.push(format!("Owner {owner} has extra card {card} in {zone}"));
1442            }
1443        }
1444
1445        let mut instance_ids: HashSet<CardInstanceId> = HashSet::new();
1446        fn check_instance(
1447            instance_ids: &mut HashSet<CardInstanceId>,
1448            errors: &mut Vec<String>,
1449            card: &CardInstance,
1450            zone: &str,
1451        ) {
1452            if card.instance_id == 0 {
1453                errors.push(format!("Card instance id 0 in {zone}"));
1454                return;
1455            }
1456            if !instance_ids.insert(card.instance_id) {
1457                errors.push(format!(
1458                    "Duplicate instance id {} in {zone}",
1459                    card.instance_id
1460                ));
1461            }
1462        }
1463
1464        for zone_player in 0..2 {
1465            let p = &self.state.players[zone_player];
1466            for card in &p.deck {
1467                consume(
1468                    &mut counts,
1469                    &mut errors,
1470                    card.owner,
1471                    card.id,
1472                    &format!("p{zone_player} deck"),
1473                );
1474                check_instance(
1475                    &mut instance_ids,
1476                    &mut errors,
1477                    card,
1478                    &format!("p{zone_player} deck"),
1479                );
1480            }
1481            for card in &p.hand {
1482                consume(
1483                    &mut counts,
1484                    &mut errors,
1485                    card.owner,
1486                    card.id,
1487                    &format!("p{zone_player} hand"),
1488                );
1489                check_instance(
1490                    &mut instance_ids,
1491                    &mut errors,
1492                    card,
1493                    &format!("p{zone_player} hand"),
1494                );
1495            }
1496            for card in &p.waiting_room {
1497                consume(
1498                    &mut counts,
1499                    &mut errors,
1500                    card.owner,
1501                    card.id,
1502                    &format!("p{zone_player} waiting_room"),
1503                );
1504                check_instance(
1505                    &mut instance_ids,
1506                    &mut errors,
1507                    card,
1508                    &format!("p{zone_player} waiting_room"),
1509                );
1510            }
1511            for card in &p.clock {
1512                consume(
1513                    &mut counts,
1514                    &mut errors,
1515                    card.owner,
1516                    card.id,
1517                    &format!("p{zone_player} clock"),
1518                );
1519                check_instance(
1520                    &mut instance_ids,
1521                    &mut errors,
1522                    card,
1523                    &format!("p{zone_player} clock"),
1524                );
1525            }
1526            for card in &p.level {
1527                consume(
1528                    &mut counts,
1529                    &mut errors,
1530                    card.owner,
1531                    card.id,
1532                    &format!("p{zone_player} level"),
1533                );
1534                check_instance(
1535                    &mut instance_ids,
1536                    &mut errors,
1537                    card,
1538                    &format!("p{zone_player} level"),
1539                );
1540            }
1541            for card in &p.stock {
1542                consume(
1543                    &mut counts,
1544                    &mut errors,
1545                    card.owner,
1546                    card.id,
1547                    &format!("p{zone_player} stock"),
1548                );
1549                check_instance(
1550                    &mut instance_ids,
1551                    &mut errors,
1552                    card,
1553                    &format!("p{zone_player} stock"),
1554                );
1555            }
1556            for card in &p.memory {
1557                consume(
1558                    &mut counts,
1559                    &mut errors,
1560                    card.owner,
1561                    card.id,
1562                    &format!("p{zone_player} memory"),
1563                );
1564                check_instance(
1565                    &mut instance_ids,
1566                    &mut errors,
1567                    card,
1568                    &format!("p{zone_player} memory"),
1569                );
1570            }
1571            for card in &p.climax {
1572                consume(
1573                    &mut counts,
1574                    &mut errors,
1575                    card.owner,
1576                    card.id,
1577                    &format!("p{zone_player} climax"),
1578                );
1579                check_instance(
1580                    &mut instance_ids,
1581                    &mut errors,
1582                    card,
1583                    &format!("p{zone_player} climax"),
1584                );
1585            }
1586            for card in &p.resolution {
1587                consume(
1588                    &mut counts,
1589                    &mut errors,
1590                    card.owner,
1591                    card.id,
1592                    &format!("p{zone_player} resolution"),
1593                );
1594                check_instance(
1595                    &mut instance_ids,
1596                    &mut errors,
1597                    card,
1598                    &format!("p{zone_player} resolution"),
1599                );
1600            }
1601            for (slot_idx, slot) in p.stage.iter().enumerate() {
1602                if let Some(card) = slot.card {
1603                    consume(
1604                        &mut counts,
1605                        &mut errors,
1606                        card.owner,
1607                        card.id,
1608                        &format!("p{zone_player} stage[{slot_idx}]"),
1609                    );
1610                    check_instance(
1611                        &mut instance_ids,
1612                        &mut errors,
1613                        &card,
1614                        &format!("p{zone_player} stage[{slot_idx}]"),
1615                    );
1616                }
1617            }
1618        }
1619
1620        for (owner, owner_counts) in counts.iter().enumerate() {
1621            for (card, remaining) in owner_counts.iter() {
1622                if *remaining != 0 {
1623                    errors.push(format!(
1624                        "Owner {owner} card {card} count mismatch ({remaining})"
1625                    ));
1626                }
1627            }
1628        }
1629
1630        if let Some(decision) = &self.decision {
1631            if let Some(slot) = decision.focus_slot {
1632                if slot as usize >= self.state.players[decision.player as usize].stage.len() {
1633                    errors.push("Decision focus slot out of range".to_string());
1634                }
1635            }
1636            match decision.kind {
1637                DecisionKind::AttackDeclaration => {
1638                    if self.state.turn.attack.is_some() {
1639                        errors.push("Attack declaration while attack context active".to_string());
1640                    }
1641                }
1642                DecisionKind::LevelUp => {
1643                    if self.state.turn.pending_level_up.is_none() {
1644                        errors.push("Level up decision without pending level".to_string());
1645                    }
1646                }
1647                DecisionKind::Encore => {
1648                    let has = self
1649                        .state
1650                        .turn
1651                        .encore_queue
1652                        .iter()
1653                        .any(|r| r.player == decision.player);
1654                    if !has {
1655                        errors.push("Encore decision without reversed options".to_string());
1656                    }
1657                }
1658                DecisionKind::TriggerOrder => {
1659                    if self.state.turn.trigger_order.is_none() {
1660                        errors.push("Trigger order decision without pending order".to_string());
1661                    }
1662                }
1663                DecisionKind::Choice => {
1664                    if let Some(choice) = &self.state.turn.choice {
1665                        if choice.player != decision.player {
1666                            errors.push("Choice decision player mismatch".to_string());
1667                        }
1668                    } else {
1669                        errors.push("Choice decision without pending choice".to_string());
1670                    }
1671                }
1672                _ => {}
1673            }
1674        }
1675
1676        if self.state.turn.attack.is_some() && self.state.turn.phase != Phase::Attack {
1677            errors.push("Attack context outside Attack phase".to_string());
1678        }
1679
1680        if errors.is_empty() {
1681            return Ok(());
1682        }
1683
1684        let state_hash = crate::fingerprint::state_fingerprint(&self.state);
1685        let phase = self.state.turn.phase;
1686        let attack_step = self.state.turn.attack.as_ref().map(|c| c.step);
1687        let tail_len = 8usize;
1688        let actions_tail: Vec<String> = self
1689            .replay_actions
1690            .iter()
1691            .rev()
1692            .take(tail_len)
1693            .rev()
1694            .map(|a| format!("{a:?}"))
1695            .collect();
1696        let decisions_tail: Vec<String> = self
1697            .replay_steps
1698            .iter()
1699            .rev()
1700            .take(tail_len)
1701            .rev()
1702            .map(|s| format!("{:?}/{:?}", s.decision_kind, s.actor))
1703            .collect();
1704        let fallback_action = self
1705            .last_action_desc
1706            .as_ref()
1707            .map(|a| format!("{a:?}"))
1708            .unwrap_or_else(|| "None".to_string());
1709        let payload = format!(
1710            "seed={}\nphase={:?}\nattack_step={:?}\nlast_action={}\nactions_tail={:?}\ndecisions_tail={:?}\nstate_hash={}",
1711            self.episode_seed,
1712            phase,
1713            attack_step,
1714            fallback_action,
1715            actions_tail,
1716            decisions_tail,
1717            state_hash,
1718        );
1719        Err(anyhow!("{}\n{}", payload, errors.join("; ")))
1720    }
1721
1722    pub(crate) fn build_outcome_no_copy(&mut self, reward: f32) -> StepOutcome {
1723        self.build_outcome_with_obs(reward, false)
1724    }
1725
1726    fn build_outcome_with_obs(&mut self, reward: f32, copy_obs: bool) -> StepOutcome {
1727        let perspective = self
1728            .decision
1729            .as_ref()
1730            .map(|d| d.player)
1731            .unwrap_or(self.last_perspective);
1732        self.refresh_slot_power_cache();
1733        encode_observation_with_slot_power(
1734            &self.state,
1735            &self.db,
1736            &self.curriculum,
1737            perspective,
1738            self.decision.as_ref(),
1739            self.last_action_desc.as_ref(),
1740            self.last_action_player,
1741            self.config.observation_visibility,
1742            &self.slot_power_cache,
1743            &mut self.obs_buf,
1744        );
1745        let obs = if copy_obs {
1746            self.obs_buf.clone()
1747        } else {
1748            Vec::new()
1749        };
1750        let info = EnvInfo {
1751            obs_version: OBS_ENCODING_VERSION,
1752            action_version: ACTION_ENCODING_VERSION,
1753            decision_kind: self
1754                .decision
1755                .as_ref()
1756                .map(|d| match d.kind {
1757                    DecisionKind::Mulligan => 0,
1758                    DecisionKind::Clock => 1,
1759                    DecisionKind::Main => 2,
1760                    DecisionKind::Climax => 3,
1761                    DecisionKind::AttackDeclaration => 4,
1762                    DecisionKind::LevelUp => 5,
1763                    DecisionKind::Encore => 6,
1764                    DecisionKind::TriggerOrder => 7,
1765                    DecisionKind::Choice => 8,
1766                })
1767                .unwrap_or(-1),
1768            current_player: self.decision.as_ref().map(|d| d.player as i8).unwrap_or(-1),
1769            actor: self.last_perspective as i8,
1770            decision_count: self.state.turn.decision_count,
1771            tick_count: self.state.turn.tick_count,
1772            terminal: self.state.terminal,
1773            illegal_action: self.last_illegal_action,
1774            engine_error: self.last_engine_error,
1775            engine_error_code: self.last_engine_error_code as u8,
1776        };
1777        let truncated = matches!(self.state.terminal, Some(TerminalResult::Timeout));
1778        let terminated = matches!(
1779            self.state.terminal,
1780            Some(TerminalResult::Win { .. } | TerminalResult::Draw)
1781        );
1782        StepOutcome {
1783            obs,
1784            reward,
1785            terminated,
1786            truncated,
1787            info,
1788        }
1789    }
1790
1791    pub(crate) fn advance_until_decision(&mut self) {
1792        let mut auto_resolve_steps: u32 = 0;
1793        loop {
1794            if self.state.terminal.is_some() {
1795                break;
1796            }
1797            self.resolve_pending_losses();
1798            self.run_rule_actions_if_needed();
1799            self.refresh_continuous_modifiers_if_needed();
1800            if self.decision.is_some() {
1801                break;
1802            }
1803            if self.state.turn.tick_count >= self.config.max_ticks {
1804                self.state.terminal = Some(TerminalResult::Timeout);
1805                break;
1806            }
1807            self.state.turn.tick_count += 1;
1808
1809            if let Some(player) = self.state.turn.pending_level_up {
1810                self.set_decision(Decision {
1811                    player,
1812                    kind: DecisionKind::LevelUp,
1813                    focus_slot: None,
1814                });
1815                break;
1816            }
1817
1818            if self.handle_trigger_pipeline() {
1819                if self.decision.is_some() {
1820                    break;
1821                }
1822                continue;
1823            }
1824
1825            if self.handle_priority_window() {
1826                if self.decision.is_some() {
1827                    break;
1828                }
1829                continue;
1830            }
1831            if !self.curriculum.enable_priority_windows
1832                && self.state.turn.priority.is_none()
1833                && self.state.turn.choice.is_none()
1834                && self.state.turn.stack_order.is_none()
1835                && !self.state.turn.stack.is_empty()
1836            {
1837                auto_resolve_steps = auto_resolve_steps.saturating_add(1);
1838                if auto_resolve_steps > STACK_AUTO_RESOLVE_CAP {
1839                    self.log_event(Event::AutoResolveCapExceeded {
1840                        cap: STACK_AUTO_RESOLVE_CAP,
1841                        stack_len: self.state.turn.stack.len() as u32,
1842                        window: self.state.turn.active_window,
1843                    });
1844                    self.last_engine_error = true;
1845                    self.last_engine_error_code = EngineErrorCode::StackAutoResolveCap;
1846                    self.state.terminal = Some(TerminalResult::Timeout);
1847                    break;
1848                }
1849                if let Some(item) = self.state.turn.stack.pop() {
1850                    self.resolve_stack_item(&item);
1851                    self.log_event(Event::StackResolved { item });
1852                    continue;
1853                }
1854            }
1855
1856            if self.state.turn.stack.is_empty()
1857                && self.state.turn.pending_triggers.is_empty()
1858                && self.state.turn.choice.is_none()
1859                && self.state.turn.priority.is_none()
1860                && self.state.turn.stack_order.is_none()
1861            {
1862                self.cleanup_pending_resolution_cards();
1863            }
1864
1865            if !self.state.turn.encore_queue.is_empty() {
1866                if !self.state.turn.encore_begin_done {
1867                    self.run_check_timing(crate::db::AbilityTiming::BeginEncoreStep);
1868                    self.state.turn.encore_begin_done = true;
1869                    continue;
1870                }
1871                if self.curriculum.enable_priority_windows && !self.state.turn.encore_window_done {
1872                    self.state.turn.encore_window_done = true;
1873                    if self.state.turn.priority.is_none() {
1874                        self.enter_timing_window(
1875                            TimingWindow::EncoreWindow,
1876                            self.state.turn.active_player,
1877                        );
1878                    }
1879                    break;
1880                }
1881                if self.state.turn.encore_step_player.is_none() {
1882                    self.state.turn.encore_step_player = Some(self.state.turn.active_player);
1883                }
1884                let current = self.state.turn.encore_step_player.unwrap();
1885                let has_current = self
1886                    .state
1887                    .turn
1888                    .encore_queue
1889                    .iter()
1890                    .any(|r| r.player == current);
1891                let next_player = if has_current {
1892                    Some(current)
1893                } else {
1894                    let other = 1 - current;
1895                    if self
1896                        .state
1897                        .turn
1898                        .encore_queue
1899                        .iter()
1900                        .any(|r| r.player == other)
1901                    {
1902                        self.state.turn.encore_step_player = Some(other);
1903                        Some(other)
1904                    } else {
1905                        self.state.turn.encore_step_player = None;
1906                        None
1907                    }
1908                };
1909                if let Some(player) = next_player {
1910                    self.set_decision(Decision {
1911                        player,
1912                        kind: DecisionKind::Encore,
1913                        focus_slot: None,
1914                    });
1915                    break;
1916                }
1917            }
1918
1919            match self.state.turn.phase {
1920                Phase::Mulligan => {
1921                    if self.state.turn.mulligan_done[0] && self.state.turn.mulligan_done[1] {
1922                        self.state.turn.phase = Phase::Stand;
1923                        self.state.turn.phase_step = 0;
1924                        self.state.turn.active_player = self.state.turn.starting_player;
1925                        continue;
1926                    }
1927                    let sp = self.state.turn.starting_player as usize;
1928                    let next = if !self.state.turn.mulligan_done[sp] {
1929                        sp
1930                    } else {
1931                        1 - sp
1932                    };
1933                    self.set_decision(Decision {
1934                        player: next as u8,
1935                        kind: DecisionKind::Mulligan,
1936                        focus_slot: None,
1937                    });
1938                    break;
1939                }
1940                Phase::Stand => {
1941                    let p = self.state.turn.active_player;
1942                    match self.state.turn.phase_step {
1943                        0 => {
1944                            self.run_check_timing(crate::db::AbilityTiming::BeginTurn);
1945                            if self.state.turn.pending_level_up.is_some()
1946                                || !self.state.turn.pending_triggers.is_empty()
1947                            {
1948                                continue;
1949                            }
1950                            self.run_check_timing(crate::db::AbilityTiming::BeginStandPhase);
1951                            self.state.turn.phase_step = 1;
1952                            continue;
1953                        }
1954                        1 => {
1955                            self.resolve_stand_phase(p);
1956                            self.state.turn.phase_step = 2;
1957                            continue;
1958                        }
1959                        2 => {
1960                            self.run_check_timing(crate::db::AbilityTiming::AfterStandPhase);
1961                            self.state.turn.phase_step = 3;
1962                            continue;
1963                        }
1964                        _ => {
1965                            if self.state.turn.pending_level_up.is_some()
1966                                || !self.state.turn.pending_triggers.is_empty()
1967                            {
1968                                continue;
1969                            }
1970                            self.state.turn.phase = Phase::Draw;
1971                            self.state.turn.phase_step = 0;
1972                            continue;
1973                        }
1974                    }
1975                }
1976                Phase::Draw => {
1977                    let p = self.state.turn.active_player;
1978                    match self.state.turn.phase_step {
1979                        0 => {
1980                            self.run_check_timing(crate::db::AbilityTiming::BeginDrawPhase);
1981                            self.state.turn.phase_step = 1;
1982                            continue;
1983                        }
1984                        1 => {
1985                            self.draw_to_hand(p, 1);
1986                            self.state.turn.phase_step = 2;
1987                            continue;
1988                        }
1989                        2 => {
1990                            self.run_check_timing(crate::db::AbilityTiming::AfterDrawPhase);
1991                            self.state.turn.phase_step = 3;
1992                            continue;
1993                        }
1994                        _ => {
1995                            if self.state.turn.pending_level_up.is_some()
1996                                || !self.state.turn.pending_triggers.is_empty()
1997                            {
1998                                continue;
1999                            }
2000                            self.state.turn.phase = if self.curriculum.enable_clock_phase {
2001                                Phase::Clock
2002                            } else {
2003                                Phase::Main
2004                            };
2005                            self.state.turn.phase_step = 0;
2006                            continue;
2007                        }
2008                    }
2009                }
2010                Phase::Clock => {
2011                    if !self.curriculum.enable_clock_phase {
2012                        self.state.turn.phase = Phase::Main;
2013                        self.state.turn.phase_step = 0;
2014                        continue;
2015                    }
2016                    let p = self.state.turn.active_player;
2017                    match self.state.turn.phase_step {
2018                        0 => {
2019                            self.run_check_timing(crate::db::AbilityTiming::BeginClockPhase);
2020                            self.state.turn.phase_step = 1;
2021                            continue;
2022                        }
2023                        1 => {
2024                            self.set_decision(Decision {
2025                                player: p,
2026                                kind: DecisionKind::Clock,
2027                                focus_slot: None,
2028                            });
2029                            break;
2030                        }
2031                        2 => {
2032                            self.run_check_timing(crate::db::AbilityTiming::AfterClockPhase);
2033                            self.state.turn.phase_step = 3;
2034                            continue;
2035                        }
2036                        _ => {
2037                            if self.state.turn.pending_level_up.is_some()
2038                                || !self.state.turn.pending_triggers.is_empty()
2039                            {
2040                                continue;
2041                            }
2042                            self.state.turn.phase = Phase::Main;
2043                            self.state.turn.phase_step = 0;
2044                            continue;
2045                        }
2046                    }
2047                }
2048                Phase::Main => {
2049                    let p = self.state.turn.active_player;
2050                    if self.state.turn.phase_step == 0 {
2051                        self.run_check_timing(crate::db::AbilityTiming::BeginMainPhase);
2052                        self.state.turn.phase_step = 1;
2053                        continue;
2054                    }
2055                    self.set_decision(Decision {
2056                        player: p,
2057                        kind: DecisionKind::Main,
2058                        focus_slot: None,
2059                    });
2060                    break;
2061                }
2062                Phase::Climax => {
2063                    if !self.curriculum.enable_climax_phase {
2064                        self.state.turn.phase = Phase::Attack;
2065                        self.state.turn.phase_step = 0;
2066                        self.state.turn.attack_phase_begin_done = false;
2067                        self.state.turn.attack_decl_check_done = false;
2068                        continue;
2069                    }
2070                    let p = self.state.turn.active_player;
2071                    match self.state.turn.phase_step {
2072                        0 => {
2073                            self.run_check_timing(crate::db::AbilityTiming::BeginClimaxPhase);
2074                            self.state.turn.phase_step = 1;
2075                            continue;
2076                        }
2077                        1 => {
2078                            self.set_decision(Decision {
2079                                player: p,
2080                                kind: DecisionKind::Climax,
2081                                focus_slot: None,
2082                            });
2083                            break;
2084                        }
2085                        2 => {
2086                            self.run_check_timing(crate::db::AbilityTiming::AfterClimaxPhase);
2087                            self.state.turn.phase_step = 3;
2088                            continue;
2089                        }
2090                        _ => {
2091                            if self.state.turn.pending_level_up.is_some()
2092                                || !self.state.turn.pending_triggers.is_empty()
2093                            {
2094                                continue;
2095                            }
2096                            self.state.turn.phase = Phase::Attack;
2097                            self.state.turn.phase_step = 0;
2098                            self.state.turn.attack_phase_begin_done = false;
2099                            self.state.turn.attack_decl_check_done = false;
2100                            continue;
2101                        }
2102                    }
2103                }
2104                Phase::Attack => {
2105                    if !self.state.turn.attack_phase_begin_done {
2106                        self.run_check_timing(crate::db::AbilityTiming::BeginAttackPhase);
2107                        self.state.turn.attack_phase_begin_done = true;
2108                        continue;
2109                    }
2110                    if self.state.turn.attack.is_none() {
2111                        if !self.state.turn.attack_decl_check_done {
2112                            self.run_check_timing(
2113                                crate::db::AbilityTiming::BeginAttackDeclarationStep,
2114                            );
2115                            self.state.turn.attack_decl_check_done = true;
2116                            continue;
2117                        }
2118                        let p = self.state.turn.active_player;
2119                        self.recompute_derived_attack();
2120                        self.set_decision(Decision {
2121                            player: p,
2122                            kind: DecisionKind::AttackDeclaration,
2123                            focus_slot: None,
2124                        });
2125                        break;
2126                    }
2127                    self.resolve_attack_pipeline();
2128                }
2129                Phase::End => {
2130                    let p = self.state.turn.active_player;
2131                    if self.resolve_end_phase(p) {
2132                        self.state.turn.active_player = 1 - p;
2133                        self.state.turn.phase = Phase::Stand;
2134                        self.state.turn.phase_step = 0;
2135                    }
2136                }
2137            }
2138            self.maybe_validate_state("advance_loop");
2139        }
2140    }
2141
2142    fn card_set_allowed(&self, card: &CardStatic) -> bool {
2143        match (&self.curriculum.allowed_card_sets_cache, &card.card_set) {
2144            (None, _) => true,
2145            (Some(set), Some(set_id)) => set.contains(set_id),
2146            (Some(_), None) => false,
2147        }
2148    }
2149
2150    fn handle_illegal_action(
2151        &mut self,
2152        acting_player: u8,
2153        reason: &str,
2154        copy_obs: bool,
2155    ) -> Result<StepOutcome> {
2156        self.last_illegal_action = true;
2157        self.last_perspective = acting_player;
2158        match self.config.error_policy {
2159            ErrorPolicy::Strict => Err(anyhow!("Illegal action: {reason}")),
2160            ErrorPolicy::LenientTerminate => {
2161                let winner = 1 - acting_player;
2162                self.state.terminal = Some(TerminalResult::Win { winner });
2163                self.decision = None;
2164                self.update_action_cache();
2165                Ok(self.build_outcome_with_obs(self.terminal_reward_for(acting_player), copy_obs))
2166            }
2167            ErrorPolicy::LenientNoop => {
2168                self.update_action_cache();
2169                Ok(self.build_outcome_with_obs(0.0, copy_obs))
2170            }
2171        }
2172    }
2173
2174    pub(crate) fn terminal_reward_for(&self, perspective: u8) -> f32 {
2175        let RewardConfig {
2176            terminal_win,
2177            terminal_loss,
2178            terminal_draw,
2179            ..
2180        } = &self.config.reward;
2181        match self.state.terminal {
2182            Some(TerminalResult::Win { winner }) => {
2183                if winner == perspective {
2184                    *terminal_win
2185                } else {
2186                    *terminal_loss
2187                }
2188            }
2189            Some(TerminalResult::Draw | TerminalResult::Timeout) => *terminal_draw,
2190            None => 0.0,
2191        }
2192    }
2193}
2194
2195#[cfg(test)]
2196mod tests;