#!/bin/sh
# Script pentru extras informatii din facturile de Uber si Bolt si pentru
# generat & incarcat facturi la e-Factura. De la 1 ianuarie 2025, este
# obligatoriu sa trimiti inclusiv facturile pentru persoane fizice, ceea ce
# necesita automatizare avand in vedere volumul mare a acestora. Eu unul prefer
# intoarcerea la dosarul cu sina.
# Script-ul a fost scris asumand ca firma pentru care se face contabilitatea
# este PFA, neplatitoare de TVA.
# Autor: 04dco <
[email protected]>
# Licenta: Domeniul public
# Dependente: poppler-utils, grep, sed, head/tail, tr, jq, curl, lynx, iconv
# TODO: foloseste API Webservice de la ANAF pentru a cauta detaliile firmelor
# dupa CUI, cu fallback cand nu e disponibil, cam greu cand nu merge 80% din timp.
[ -z ""$1"" ] && printf "Usage: $0 file.pdf\n" && exit 1
# Invocare rapida repetata: for i in *.pdf; do invoice-extract "$i"; done
VERSION="0.4"
# INIT
export OPENSSL_CONF=/etc/openssl-etoken.conf
JUDETE="
Err,XX
Alba,AB
Arad,AR
Arges,AG
Bacau,BC
Bihor,BH
Bistrita-Nasaud,BN
Botosani,BT
Braila,BR
Brasov,BV
Buzau,BZ
Calarasi,CL
Caras-Severin,CS
Cluj,CJ
Constanta,CT
Covasna,CV
Dambovita,DB
Dolj,DJ
Galati,GL
Giurgiu,GR
Gorj,GJ
Harghita,HR
Hunedoara,HD
Ialomita,IL
Iasi,IS
Ilfov,IF
Maramures,MM
Mehedinti,MH
Mures,MS
Neamt,NT
Olt,OT
Prahova,PH
Salaj,SJ
Satu Mare,SM
Sibiu,SB
Suceava,SV
Teleorman,TR
Timis,TM
Tulcea,TL
Valcea,VL
Vaslui,VS
Vrancea,VN
Bucuresti,B
"
NONVATPAYER1='<!--'
NONVATPAYER2='-->'
UA="User-Agent: Mozilla/5.0 (Windows NT 10.0; rv:128.0) Gecko/20100101 Firefox/128.0"
UUID=$(cat /proc/sys/kernel/random/uuid)
pdftotext "$1" /tmp/fact-$UUID.txt || exit 1
# Informatiile mele
MYNAME="ION POPESCU PERSOANA FIZICA AUTORIZATA"
MYCUI="12345678"
MYADDRESS="Calea Sagului Nr. 21"
MYCITY="Timisoara"
MYSUBDIV="TM"
## Extrage date din facturi
# XXX: De preferat as folosi CSV ca e mai stabil si usor de parsat dar problema
# e ca Uber nu are export in CSV deci momentan e mai simplu sa nu imi bat capul.
# Numarul facturii
NUM=$(cat /tmp/fact-$UUID.txt \
| grep 'Num?rul facturii\|Invoice no\.\|Factura nr\.\|Factur? simplificat?' \
| sed 's/^.*[:\.] //g;s/Factur? simplificat? //g' \
| head -n1)
## Data emiterii facturii
DATE=$(cat /tmp/fact-$UUID.txt \
| grep '[0-9][0-9]\.[0-9][0-9]\.20[0-9][0-9]' \
| head -n1 \
| sed 's/^.*: //g')
# Nu imi vine sa cred ca programul date din coreutils nu suporta date in format
# European dd.mm.yyyy dar suporta in format American mm/dd/yyyy.
ISODATE=$(echo ${DATE} \
| awk '{split($0,a,"."); print a[3]"-"a[2]"-"a[1]}')
## Valoarea platita
VALUE=$(cat /tmp/fact-$UUID.txt \
| grep '[0-9][,\.][0-9][0-9]' \
| tail -n1 \
| sed 's/,/\./g' | tr -d ' RON')
## Instrumentul de plata
# Uber momentan nu specifica nicaieri in facturi instrumentul de plata deci nu
# avem cum sa stim, doar punem 380 (factura fiscala) si ne rugam sa fie bine.
if grep -iq "Cash\|Numerar\|Efectivo" /tmp/fact-$UUID.txt; then
TYPE=751 # Doar in scopuri contabile (pentru statisticile ANAF,
# sustinut de bon fiscal)
else
TYPE=380 # Factura fiscala
fi
## Platforma
if grep -iq "bolt" /tmp/fact-$UUID.txt; then
PLATFORM="bolt"
elif grep -iq "uber" /tmp/fact-$UUID.txt; then
PLATFORM="uber"
fi
## Numele clientului, converteste la ASCII ca e-Factura nu suporta caractere
## non-latine.
if [ ${PLATFORM} = "bolt" ]; then
CUSTOMER=$(cat /tmp/fact-$UUID.txt \
| iconv -f UTF-8 -t ASCII//TRANSLIT \
| head -n8 | tail -n1)
elif [ ${PLATFORM} = "uber" ]; then
CUSTOMER=$(cat /tmp/fact-$UUID.txt \
| iconv -f UTF-8 -t ASCII//TRANSLIT \
| head -n3 | tail -n1)
fi
# Afisare informatii despre factura pentru a verifica ca extractorul
# functioneaza corect
echo "Fisier de intrare ${1}"
echo "Procesat factura ${PLATFORM} Nr. ${NUM} din ${ISODATE}"
echo "In valoare de ${VALUE}, cod ${TYPE}, client ${CUSTOMER}
"
# Inregistrare in log pentru ca sa nu uit care facturi am trimis si care nu.
echo "$(date --iso-8601=minutes) Procesat factura ${PLATFORM} \
Nr. ${NUM} din ${ISODATE} in valoare de ${VALUE}, cod \
${TYPE}, client ${CUSTOMER}" >> ~/.cache/invoice-extract.log
## Operatii cu CUI
# Extrage din factura
CUI=$(cat /tmp/fact-$UUID.txt \
| sed "s/${MYCUI}//g;s/${MYCIF}//g" \
| grep -i 'CIF\|Cod unic de inregistrare' \
| head -n1 \
| sed 's/^.*: //g')
if [ -z "${CUI}" ]; then
# CUI-ul nu este trecut in factura, cauta dupa numele firmei
# XXX: Aici convertesc numele firmei la URL safe cu jq, desi nu il
# folosesc alt undeva. Ar fi o idee ori sa trec la API-uri exclusiv pe
# JSON ori sa scap de el.
URLSAFECUST=$(echo -n ${CUSTOMER} | jq -sRr @uri)
echo "${CUSTOMER}" \
| iconv -f UTF-8 -t ASCII//TRANSLIT \
| sed "s/${MYNAME}//g" \
| grep -iq "SRL\|S.R.L.\| ROMANIA\|CABINET\|COOPERATIV?\|PFA\| AUTORIZATA\| INTREPRINDERE\| EXPERT\|AVOCAT\|CONTABIL" \
&& echo "[i] Clientul este firma dar factura nu are CUI, se incearca cautarea dupa nume." \
&& CUI=RO$(curl -sH "$UA" "
https://www.listafirme.ro/search.asp?q=${URLSAFECUST}" \
| grep -o '\-[0-9]*\/' \
| head -n1 | tr -d '\-/')
# CUI negasit, cauta din nou cu numele firmei limitat la primele 2 cuvinte
if echo "$CUI" | grep -q "^RO$"; then
echo "[i] Nu s-a gasit, caut din nou cu nume trunchiat"
TCUSTOMER=$(echo ${CUSTOMER} | awk 'NR==1{print $1,$2}')
URLSAFECUST=$(echo -n ${TCUSTOMER} | jq -sRr @uri)
CUI=RO$(curl -sH "$UA" "
https://www.listafirme.ro/search.asp?q=${URLSAFECUST}" \
| grep -o '\-[0-9]*\/' \
| head -n1 | tr -d '\-/')
fi
# Tot nu s-a gasit, ma dau batut
if echo "$CUI" | grep -q "^RO$"; then
echo "[!] Nu s-a gasit, cauta manual:
https://duckduckgo.com/?q=${URLSAFECUST}+CUI"
read -p "Introdu CUI cu RO: " CUI
fi
fi
# Daca pana acum nu avem un CUI, atunci clientul este persoana fizica.
if [ -z "${CUI}" ]; then
## Persoana fizica
CUI=RO0000000000000 # Pe bune ANAF? Standardul UBL nu e facut pentru asa ceva!
SCUI=$(echo $CUI | tr -d 'RO')
ADDRESS="Necunoscut"
CITY="Necunoscut"
COUNTY="Necunoscut"
SUBDIV="RO-TM"
NONVATPAYER1="<!--"
NONVATPAYER2="-->"
else
# Teoretic in legislatie, un CUI cu ^RO este platitor de TVA dar in
# UBL, toate identificatoarele trebuie sa inceapa cu cod ISO de tara,
# inclusiv magaria ANAF-ului cu CNP la camp de CUI.
SCUI=$(echo $CUI | tr -d 'RO')
if echo "$SCUI" | grep -iq "$CUI"; then
CUI=RO$CUI
fi
# TODO: Platitor TVA y/n, de ce ar fi relevant cand firma mea este neplatitor TVA?
NONVATPAYER1=""
NONVATPAYER2=""
# XXX: M-am lenevit si am folosit lynx, mai bine curl cu mai mult
# parsing decat program separat. Sau daca API-ul de la ANAF ar merge
# mai des, doar folosesc curl si jq.
ADDRESS=$(lynx -dump -justify -nonumbers
https://www.dupacui.ro/\?cui=${SCUI} \
| iconv -f UTF-8 -t ASCII//TRANSLIT \
| grep -A3 'NUME FIRMA' \
| tr -d '\n' \
| grep -o '..\/..*\/.... .*,' \
| sed 's/..\/..*\/.... //g;s/,$//g')
# Bucuresti este exceptie
if echo "$ADDRESS" | grep -iq "Bucuresti"; then
CITY=SECTOR$(echo $ADDRESS | awk '{print $3}' | tr -d ',')
COUNTY="Bucuresti"
SUBDIV="RO-B"
else
CITY=$(echo $ADDRESS | awk '{print $2}' | tr -d ',')
COUNTY=$(echo $ADDRESS | grep -io 'Judet .*$' | sed 's/^Judet //g' | head -n1)
SUBDIV="RO-$(echo $JUDETE | grep -io ${COUNTY},.. | sed 's/^.*,//g' | head -n1)"
fi
fi
echo "CUI: ${CUI} (${SCUI})"
echo "Adresa: ${ADDRESS}"
echo "Oras: ${CITY} (${SUBDIV})"
# Genereaza document XML
OUTFILE="${PLATFORM}-${NUM}.xml"
cat > "${OUTFILE}" << EOF
<?xml version="1.0" encoding="UTF-8" standalone="yes"?>
<Invoice xmlns="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2"
xmlns:cbc="urn:oasis:names:specification:ubl:schema:xsd:CommonBasicComponents-2"
xmlns:cac="urn:oasis:names:specification:ubl:schema:xsd:CommonAggregateComponents-2"
xmlns:ns4="urn:oasis:names:specification:ubl:schema:xsd:CommonExtensionComponents-2"
xmlns:xsi="
http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="urn:oasis:names:specification:ubl:schema:xsd:Invoice-2
http://docs.oasis-open.org/ubl/os-UBL-2.1/xsd/maindoc/UBL-Invoice-2.1.xsd">
<cbc:CustomizationID>urn:cen.eu:en16931:2017#compliant#urn:efactura.mfinante.ro:CIUS-RO:1.0.1</cbc:CustomizationID>
<cbc:ID>${NUM}</cbc:ID>
<cbc:IssueDate>${ISODATE}</cbc:IssueDate>
<cbc:DueDate>${ISODATE}</cbc:DueDate>
<cbc:InvoiceTypeCode>${TYPE}</cbc:InvoiceTypeCode>
<cbc:DocumentCurrencyCode>RON</cbc:DocumentCurrencyCode>
<cac:AccountingSupplierParty>
<cac:Party>
<cac:PostalAddress>
<cbc:StreetName>${MYADDRESS}</cbc:StreetName>
<cbc:CityName>${MYCITY}</cbc:CityName>
<cbc:CountrySubentity>RO-${MYSUBDIV}</cbc:CountrySubentity>
<cac:Country>
<cbc:IdentificationCode>RO</cbc:IdentificationCode>
</cac:Country>
</cac:PostalAddress>
<cac:PartyTaxScheme>
<cbc:CompanyID>${MYCUI}</cbc:CompanyID>
<cac:TaxScheme/>
</cac:PartyTaxScheme>
<cac:PartyLegalEntity>
<cbc:RegistrationName>${MYNAME}</cbc:RegistrationName>
<cbc:CompanyID>${MYCUI}</cbc:CompanyID>
</cac:PartyLegalEntity>
</cac:Party>
</cac:AccountingSupplierParty>
<cac:AccountingCustomerParty>
<cac:Party>
<cac:PostalAddress>
<cbc:StreetName>${ADDRESS}</cbc:StreetName>
<cbc:CityName>${CITY}</cbc:CityName>
<cbc:CountrySubentity>${SUBDIV}</cbc:CountrySubentity>
<cac:Country>
<cbc:IdentificationCode>RO</cbc:IdentificationCode>
</cac:Country>
</cac:PostalAddress>
${NONVATPAYER1}
<cac:PartyTaxScheme>
<cbc:CompanyID>${CUI}</cbc:CompanyID>
<cac:TaxScheme>
<cbc:ID>VAT</cbc:ID>
</cac:TaxScheme>
</cac:PartyTaxScheme>
${NONVATPAYER2}
<cac:PartyLegalEntity>
<cbc:RegistrationName>${CUSTOMER}</cbc:RegistrationName>
<cbc:CompanyID>${CUI}</cbc:CompanyID>
</cac:PartyLegalEntity>
</cac:Party>
</cac:AccountingCustomerParty>
<cac:TaxTotal>
<cbc:TaxAmount currencyID="RON">0.00</cbc:TaxAmount>
<cac:TaxSubtotal>
<cbc:TaxableAmount currencyID="RON">${VALUE}</cbc:TaxableAmount>
<cbc:TaxAmount currencyID="RON">0.00</cbc:TaxAmount>
<cac:TaxCategory>
<cbc:ID>E</cbc:ID>
<cbc:Percent>0.00</cbc:Percent>
<cbc:TaxExemptionReasonCode>VATEX-EU-O</cbc:TaxExemptionReasonCode>
<cac:TaxScheme>
<cbc:ID>VAT</cbc:ID>
</cac:TaxScheme>
</cac:TaxCategory>
</cac:TaxSubtotal>
</cac:TaxTotal>
<cac:LegalMonetaryTotal>
<cbc:LineExtensionAmount currencyID="RON">${VALUE}</cbc:LineExtensionAmount>
<cbc:TaxExclusiveAmount currencyID="RON">${VALUE}</cbc:TaxExclusiveAmount>
<cbc:TaxInclusiveAmount currencyID="RON">${VALUE}</cbc:TaxInclusiveAmount>
<cbc:PrepaidAmount currencyID="RON">${VALUE}</cbc:PrepaidAmount>
<cbc:PayableRoundingAmount currencyID="RON">${VALUE}</cbc:PayableRoundingAmount>
<cbc:PayableAmount currencyID="RON">${VALUE}</cbc:PayableAmount>
</cac:LegalMonetaryTotal>
<cac:InvoiceLine>
<cbc:ID>1</cbc:ID>
<cbc:InvoicedQuantity unitCode="H87">1.00</cbc:InvoicedQuantity>
<cbc:LineExtensionAmount currencyID="RON">${VALUE}</cbc:LineExtensionAmount>
<cac:Item>
<cbc:Name>Transport alternativ</cbc:Name>
<cac:ClassifiedTaxCategory>
<cbc:ID>E</cbc:ID>
<cbc:Percent>0.00</cbc:Percent>
<cac:TaxScheme>
<cbc:ID>VAT</cbc:ID>
</cac:TaxScheme>
</cac:ClassifiedTaxCategory>
</cac:Item>
<cac:Price>
<cbc:PriceAmount currencyID="RON">${VALUE}</cbc:PriceAmount>
</cac:Price>
</cac:InvoiceLine>
</Invoice>
<!-- XML generated with invoice-extract v${VERSION} -->
EOF
## Valideaza documentul rezultat la API ANAF
# Incearca validarea de mai multe ori ca ANAF nu e in stare sa faca un API care
# ori retureaza succes, ori returneaza esec. Acesta din cel putin 2025-06-06 a
# inceput sa reseteze aleator conexiunea(!)
for i in `seq 1 10`; do
echo "Incercare verificare $i"
VALID=$(curl -sX POST \
https://webservicesp.anaf.ro/prod/FCTEL/rest/validare/FACT1 \
-H "Content-Type: text/plain" --data-binary "@${OUTFILE}")
if echo ${VALID} | grep -q "\"ok\"\|\"nok\""; then
if echo ${VALID} | grep -q "\"ok\""; then
# Daca e valid, depune-l.
echo "Document XML valid: ${VALID}"
break;
else
# Altfel, plange-te.
echo "Document XML invalid: ${VALID}"
echo "Posibila problema cu extractorul informatiilor sau lookup la CUI?"
echo "@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@"
# Curatare
rm /tmp/fact-$UUID.txt
exit 1;
fi
else
echo "Incarcarea de validare a esuat, se incearca din nou."
sleep 3
fi
done
# XXX: Acest apel esueaza daca tokenul nu este conectat la port-ul USB,
# mai mult, cere parola acestuia la fiecare invocare daca nu este
# specificata prin PIN= in configuratia OpenSSL. Se prefera oricum
# peste flow-ul de autentificare prin oAuth cu JWT-uri care e mult mai
# complex.
echo -n "Incarc la e-Factura (Y/N)?: "
#old_stty_cfg=$(stty -g)
#stty raw -echo ; answer=$(head -c 1) ; stty $old_stty_cfg
answer=y # OVERRIDE
if [ "$answer" != "${answer#[Yy]}" ];then
# Incearca de mai multe ori in caz ca token nu e conectat,
# parola e incorecta sau upload esueaza.
for i in `seq 1 10`; do
echo "Incercarea $i"
UPLOAD=$(curl -s -X POST -E \
'pkcs11:model=eToken;manufacturer=SafeNet%2C%20Inc.;serial=029b2e03;token=VCPFA' \
"
https://webserviceapl.anaf.ro/prod/FCTEL/rest/upload?standard=UBL&cif=${MYCUI}" \
-H "Content-Type: text/plain" --data-binary "@${OUTFILE}" | tail -n1)
if echo ${UPLOAD} | grep -q "index_incarcare"; then
echo "Incarcat cu succes: ${UPLOAD}"
echo "........................................"
break;
else
echo "Incarcarea a esuat, se incearca din nou."
echo "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!"
fi
done
else
echo "Upload anulat."
break;
fi
# Curatare
rm /tmp/fact-$UUID.txt
exit 0