Faster AVX2 prompt processing for k-quants and IQ4_XS (#394) #241

	name: CI
	on:
	push:
	branches: [ master, main, fix ]
	pull_request:
	branches: [ master, main, fix ]
	jobs:
	Tests:
	timeout-minutes: 60
	runs-on: ${{ matrix.os }}
	strategy:
	fail-fast: false
	matrix:
	os: [ macos-latest ] # ubuntu-latest, windows-latest are currently non-functional, requiring adaptation for proper functionality.
	steps:
	- name: Checkout Repository
	uses: actions/checkout@v4
	- name: Build
	shell: bash
	run: \|
	curl -L -o make https://cosmo.zip/pub/cosmos/bin/make
	chmod +x make
	./make -j8
	- name: Make Llamafile
	shell: bash
	run: \|
	curl -L -o tinyllama.gguf https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v0.3-GGUF/resolve/main/tinyllama-1.1b-chat-v0.3.Q4_0.gguf
	cat << EoF > .args
	-m
	tinyllama.gguf
	...
	EoF
	cp o//llama.cpp/main/main \
	tinyllama.llamafile
	o//llamafile/zipalign -j0 \
	tinyllama.llamafile \
	tinyllama.gguf \
	.args
	- name: Execute LLM CLI CPU # GA doesn't have "support_simdgroup_reduction" for RMS_NORM :'(
	shell: bash
	run: \|
	./tinyllama.llamafile -e -p '## Famous Speech\n\nFour score and seven' -n 50 -ngl 0

Provide feedback